aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/Kconfig7
-rw-r--r--net/802/Makefile2
-rw-r--r--net/802/garp.c636
-rw-r--r--net/802/psnap.c1
-rw-r--r--net/802/stp.c102
-rw-r--r--net/8021q/Kconfig10
-rw-r--r--net/8021q/Makefile12
-rw-r--r--net/8021q/vlan.c81
-rw-r--r--net/8021q/vlan.h86
-rw-r--r--net/8021q/vlan_core.c64
-rw-r--r--net/8021q/vlan_dev.c298
-rw-r--r--net/8021q/vlan_gvrp.c66
-rw-r--r--net/8021q/vlan_netlink.c7
-rw-r--r--net/8021q/vlanproc.c13
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile4
-rw-r--r--net/atm/addr.c10
-rw-r--r--net/atm/addr.h4
-rw-r--r--net/atm/br2684.c14
-rw-r--r--net/atm/common.c8
-rw-r--r--net/atm/lec.c55
-rw-r--r--net/atm/lec.h10
-rw-r--r--net/ax25/af_ax25.c6
-rw-r--r--net/ax25/ax25_std_timer.c8
-rw-r--r--net/bluetooth/bnep/bnep.h4
-rw-r--r--net/bluetooth/bnep/core.c4
-rw-r--r--net/bluetooth/bnep/netdev.c4
-rw-r--r--net/bluetooth/bnep/sock.c4
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bluetooth/rfcomm/sock.c2
-rw-r--r--net/bluetooth/rfcomm/tty.c2
-rw-r--r--net/bridge/Kconfig1
-rw-r--r--net/bridge/br.c18
-rw-r--r--net/bridge/br_device.c14
-rw-r--r--net/bridge/br_fdb.c3
-rw-r--r--net/bridge/br_forward.c6
-rw-r--r--net/bridge/br_if.c22
-rw-r--r--net/bridge/br_input.c25
-rw-r--r--net/bridge/br_ioctl.c2
-rw-r--r--net/bridge/br_notify.c2
-rw-r--r--net/bridge/br_private.h10
-rw-r--r--net/bridge/br_private_stp.h2
-rw-r--r--net/bridge/br_stp.c3
-rw-r--r--net/bridge/br_stp_bpdu.c14
-rw-r--r--net/bridge/br_stp_if.c6
-rw-r--r--net/bridge/br_stp_timer.c2
-rw-r--r--net/bridge/netfilter/Kconfig11
-rw-r--r--net/bridge/netfilter/Makefile1
-rw-r--r--net/bridge/netfilter/ebt_ip6.c144
-rw-r--r--net/bridge/netfilter/ebt_log.c66
-rw-r--r--net/can/af_can.c10
-rw-r--r--net/can/bcm.c23
-rw-r--r--net/can/raw.c3
-rw-r--r--net/core/dev.c382
-rw-r--r--net/core/dev_mcast.c24
-rw-r--r--net/core/ethtool.c37
-rw-r--r--net/core/fib_rules.c2
-rw-r--r--net/core/flow.c2
-rw-r--r--net/core/link_watch.c11
-rw-r--r--net/core/neighbour.c8
-rw-r--r--net/core/net-sysfs.c26
-rw-r--r--net/core/netpoll.c24
-rw-r--r--net/core/pktgen.c69
-rw-r--r--net/core/rtnetlink.c26
-rw-r--r--net/core/skbuff.c166
-rw-r--r--net/core/sock.c8
-rw-r--r--net/core/sysctl_net_core.c39
-rw-r--r--net/dccp/ccids/ccid3.c14
-rw-r--r--net/dccp/ccids/lib/loss_interval.c10
-rw-r--r--net/dccp/ccids/lib/packet_history.c103
-rw-r--r--net/dccp/ccids/lib/packet_history.h30
-rw-r--r--net/dccp/dccp.h17
-rw-r--r--net/dccp/ipv4.c19
-rw-r--r--net/dccp/ipv6.c15
-rw-r--r--net/dccp/options.c14
-rw-r--r--net/dccp/proto.c4
-rw-r--r--net/dccp/timer.c4
-rw-r--r--net/decnet/af_decnet.c32
-rw-r--r--net/decnet/dn_rules.c2
-rw-r--r--net/econet/af_econet.c4
-rw-r--r--net/ieee80211/ieee80211_rx.c2
-rw-r--r--net/ieee80211/ieee80211_tx.c86
-rw-r--r--net/ieee80211/ieee80211_wx.c137
-rw-r--r--net/ipv4/af_inet.c87
-rw-r--r--net/ipv4/arp.c9
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c35
-rw-r--r--net/ipv4/fib_frontend.c19
-rw-r--r--net/ipv4/fib_hash.c8
-rw-r--r--net/ipv4/fib_rules.c4
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fib_trie.c47
-rw-r--r--net/ipv4/icmp.c31
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/inet_connection_sock.c13
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_hashtables.c23
-rw-r--r--net/ipv4/inet_timewait_sock.c21
-rw-r--r--net/ipv4/inetpeer.c2
-rw-r--r--net/ipv4/ip_forward.c11
-rw-r--r--net/ipv4/ip_fragment.c61
-rw-r--r--net/ipv4/ip_gre.c32
-rw-r--r--net/ipv4/ip_input.c40
-rw-r--r--net/ipv4/ip_options.c2
-rw-r--r--net/ipv4/ip_output.c35
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/ipip.c24
-rw-r--r--net/ipv4/ipmr.c123
-rw-r--r--net/ipv4/ipvs/ip_vs_app.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_core.c5
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_dh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_est.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_ftp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_lblc.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_lblcr.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_lc.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_nq.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_ah.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_esp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_tcp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_proto_udp.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_rr.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sched.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sed.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sh.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_sync.c433
-rw-r--r--net/ipv4/ipvs/ip_vs_wlc.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_wrr.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c2
-rw-r--r--net/ipv4/netfilter/Kconfig15
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/ip_queue.c3
-rw-r--r--net/ipv4/netfilter/iptable_security.c180
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c2
-rw-r--r--net/ipv4/proc.c113
-rw-r--r--net/ipv4/protocol.c2
-rw-r--r--net/ipv4/raw.c12
-rw-r--r--net/ipv4/route.c262
-rw-r--r--net/ipv4/syncookies.c8
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp.c106
-rw-r--r--net/ipv4/tcp_diag.c2
-rw-r--r--net/ipv4/tcp_input.c236
-rw-r--r--net/ipv4/tcp_ipv4.c324
-rw-r--r--net/ipv4/tcp_minisocks.c12
-rw-r--r--net/ipv4/tcp_output.c466
-rw-r--r--net/ipv4/tcp_probe.c2
-rw-r--r--net/ipv4/tcp_timer.c27
-rw-r--r--net/ipv4/udp.c76
-rw-r--r--net/ipv4/udp_impl.h2
-rw-r--r--net/ipv4/udplite.c3
-rw-r--r--net/ipv6/addrconf.c91
-rw-r--r--net/ipv6/addrlabel.c106
-rw-r--r--net/ipv6/af_inet6.c17
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/exthdrs.c4
-rw-r--r--net/ipv6/icmp.c5
-rw-r--r--net/ipv6/inet6_hashtables.c13
-rw-r--r--net/ipv6/ip6_fib.c2
-rw-r--r--net/ipv6/ip6_input.c5
-rw-r--r--net/ipv6/ip6_output.c15
-rw-r--r--net/ipv6/ip6_tunnel.c28
-rw-r--r--net/ipv6/ip6mr.c79
-rw-r--r--net/ipv6/ipv6_sockglue.c9
-rw-r--r--net/ipv6/mcast.c3
-rw-r--r--net/ipv6/netfilter/Kconfig12
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_queue.c3
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c31
-rw-r--r--net/ipv6/netfilter/ip6table_security.c172
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c5
-rw-r--r--net/ipv6/proc.c31
-rw-r--r--net/ipv6/protocol.c2
-rw-r--r--net/ipv6/raw.c10
-rw-r--r--net/ipv6/reassembly.c63
-rw-r--r--net/ipv6/route.c70
-rw-r--r--net/ipv6/sit.c26
-rw-r--r--net/ipv6/syncookies.c6
-rw-r--r--net/ipv6/sysctl_net_ipv6.c29
-rw-r--r--net/ipv6/tcp_ipv6.c269
-rw-r--r--net/ipv6/udp.c53
-rw-r--r--net/ipv6/udp_impl.h2
-rw-r--r--net/ipv6/udplite.c2
-rw-r--r--net/irda/irnet/irnet.h1
-rw-r--r--net/irda/irnet/irnet_ppp.c57
-rw-r--r--net/irda/irnet/irnet_ppp.h7
-rw-r--r--net/irda/irnetlink.c4
-rw-r--r--net/iucv/af_iucv.c9
-rw-r--r--net/iucv/iucv.c46
-rw-r--r--net/key/af_key.c622
-rw-r--r--net/llc/af_llc.c4
-rw-r--r--net/mac80211/Kconfig164
-rw-r--r--net/mac80211/Makefile20
-rw-r--r--net/mac80211/aes_ccm.c53
-rw-r--r--net/mac80211/aes_ccm.h6
-rw-r--r--net/mac80211/cfg.c42
-rw-r--r--net/mac80211/debugfs.c58
-rw-r--r--net/mac80211/debugfs_key.c8
-rw-r--r--net/mac80211/debugfs_netdev.c59
-rw-r--r--net/mac80211/debugfs_netdev.h5
-rw-r--r--net/mac80211/debugfs_sta.c56
-rw-r--r--net/mac80211/ieee80211_i.h179
-rw-r--r--net/mac80211/iface.c396
-rw-r--r--net/mac80211/key.c11
-rw-r--r--net/mac80211/key.h54
-rw-r--r--net/mac80211/main.c613
-rw-r--r--net/mac80211/mesh.c38
-rw-r--r--net/mac80211/mesh.h2
-rw-r--r--net/mac80211/mesh_hwmp.c2
-rw-r--r--net/mac80211/mesh_pathtbl.c54
-rw-r--r--net/mac80211/mesh_plink.c88
-rw-r--r--net/mac80211/michael.c116
-rw-r--r--net/mac80211/michael.h8
-rw-r--r--net/mac80211/mlme.c1048
-rw-r--r--net/mac80211/rate.c12
-rw-r--r--net/mac80211/rate.h37
-rw-r--r--net/mac80211/rc80211_pid.h9
-rw-r--r--net/mac80211/rc80211_pid_algo.c71
-rw-r--r--net/mac80211/rc80211_pid_debugfs.c8
-rw-r--r--net/mac80211/rx.c683
-rw-r--r--net/mac80211/sta_info.c55
-rw-r--r--net/mac80211/sta_info.h168
-rw-r--r--net/mac80211/tkip.c282
-rw-r--r--net/mac80211/tkip.h8
-rw-r--r--net/mac80211/tx.c1167
-rw-r--r--net/mac80211/util.c136
-rw-r--r--net/mac80211/wep.c71
-rw-r--r--net/mac80211/wep.h2
-rw-r--r--net/mac80211/wext.c145
-rw-r--r--net/mac80211/wme.c676
-rw-r--r--net/mac80211/wme.h43
-rw-r--r--net/mac80211/wpa.c403
-rw-r--r--net/netfilter/Kconfig5
-rw-r--r--net/netfilter/nf_conntrack_core.c29
-rw-r--r--net/netfilter/nf_conntrack_extend.c10
-rw-r--r--net/netfilter/nf_conntrack_helper.c1
-rw-r--r--net/netfilter/nf_conntrack_netlink.c33
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c80
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c17
-rw-r--r--net/netfilter/nfnetlink_queue.c1
-rw-r--r--net/netfilter/xt_CONNSECMARK.c10
-rw-r--r--net/netfilter/xt_SECMARK.c10
-rw-r--r--net/netfilter/xt_string.c38
-rw-r--r--net/netlabel/netlabel_cipso_v4.c9
-rw-r--r--net/netlabel/netlabel_domainhash.c3
-rw-r--r--net/netlabel/netlabel_mgmt.c12
-rw-r--r--net/netlabel/netlabel_unlabeled.c6
-rw-r--r--net/netlink/af_netlink.c4
-rw-r--r--net/netrom/af_netrom.c22
-rw-r--r--net/packet/af_packet.c220
-rw-r--r--net/rfkill/rfkill-input.c98
-rw-r--r--net/rfkill/rfkill-input.h1
-rw-r--r--net/rfkill/rfkill.c314
-rw-r--r--net/rose/af_rose.c24
-rw-r--r--net/rose/rose_route.c29
-rw-r--r--net/rxrpc/ar-input.c5
-rw-r--r--net/sched/cls_api.c20
-rw-r--r--net/sched/cls_flow.c52
-rw-r--r--net/sched/cls_route.c12
-rw-r--r--net/sched/cls_u32.c18
-rw-r--r--net/sched/sch_api.c462
-rw-r--r--net/sched/sch_atm.c6
-rw-r--r--net/sched/sch_cbq.c136
-rw-r--r--net/sched/sch_dsmark.c6
-rw-r--r--net/sched/sch_fifo.c47
-rw-r--r--net/sched/sch_generic.c464
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sched/sch_hfsc.c94
-rw-r--r--net/sched/sch_htb.c183
-rw-r--r--net/sched/sch_netem.c38
-rw-r--r--net/sched/sch_prio.c138
-rw-r--r--net/sched/sch_red.c33
-rw-r--r--net/sched/sch_sfq.c4
-rw-r--r--net/sched/sch_tbf.c33
-rw-r--r--net/sched/sch_teql.c44
-rw-r--r--net/sctp/Kconfig4
-rw-r--r--net/sctp/Makefile4
-rw-r--r--net/sctp/associola.c4
-rw-r--r--net/sctp/bind_addr.c37
-rw-r--r--net/sctp/input.c38
-rw-r--r--net/sctp/ipv6.c20
-rw-r--r--net/sctp/output.c14
-rw-r--r--net/sctp/outqueue.c34
-rw-r--r--net/sctp/proc.c141
-rw-r--r--net/sctp/protocol.c33
-rw-r--r--net/sctp/sm_make_chunk.c7
-rw-r--r--net/sctp/sm_sideeffect.c44
-rw-r--r--net/sctp/sm_statefuns.c25
-rw-r--r--net/sctp/socket.c383
-rw-r--r--net/sctp/transport.c3
-rw-r--r--net/sctp/ulpevent.c5
-rw-r--r--net/socket.c10
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c29
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c4
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c4
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_token.c2
-rw-r--r--net/sunrpc/auth_unix.c2
-rw-r--r--net/sunrpc/clnt.c161
-rw-r--r--net/sunrpc/rpcb_clnt.c377
-rw-r--r--net/sunrpc/sched.c23
-rw-r--r--net/sunrpc/xprt.c9
-rw-r--r--net/sunrpc/xprtsock.c2
-rw-r--r--net/sysctl_net.c31
-rw-r--r--net/tipc/bcast.c14
-rw-r--r--net/tipc/bearer.c8
-rw-r--r--net/tipc/cluster.c4
-rw-r--r--net/tipc/config.c11
-rw-r--r--net/tipc/core.c13
-rw-r--r--net/tipc/core.h126
-rw-r--r--net/tipc/dbg.c231
-rw-r--r--net/tipc/dbg.h12
-rw-r--r--net/tipc/discover.c14
-rw-r--r--net/tipc/discover.h2
-rw-r--r--net/tipc/eth_media.c6
-rw-r--r--net/tipc/link.c98
-rw-r--r--net/tipc/msg.c13
-rw-r--r--net/tipc/msg.h42
-rw-r--r--net/tipc/name_distr.c6
-rw-r--r--net/tipc/name_table.c55
-rw-r--r--net/tipc/net.c14
-rw-r--r--net/tipc/net.h2
-rw-r--r--net/tipc/netlink.c16
-rw-r--r--net/tipc/node.c55
-rw-r--r--net/tipc/port.c115
-rw-r--r--net/tipc/ref.c14
-rw-r--r--net/tipc/socket.c62
-rw-r--r--net/tipc/subscr.c249
-rw-r--r--net/tipc/subscr.h34
-rw-r--r--net/tipc/user_reg.c14
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/wanrouter/Kconfig2
-rw-r--r--net/wanrouter/wanmain.c6
-rw-r--r--net/wanrouter/wanproc.c2
-rw-r--r--net/wireless/Kconfig11
-rw-r--r--net/wireless/core.c33
-rw-r--r--net/wireless/nl80211.c6
-rw-r--r--net/wireless/radiotap.c16
-rw-r--r--net/wireless/wext.c582
-rw-r--r--net/x25/af_x25.c9
-rw-r--r--net/xfrm/xfrm_user.c3
347 files changed, 11889 insertions, 9432 deletions
diff --git a/net/802/Kconfig b/net/802/Kconfig
new file mode 100644
index 000000000000..be33d27c8e69
--- /dev/null
+++ b/net/802/Kconfig
@@ -0,0 +1,7 @@
+config STP
+ tristate
+ select LLC
+
+config GARP
+ tristate
+ select STP
diff --git a/net/802/Makefile b/net/802/Makefile
index 68569ffddea1..7893d679910c 100644
--- a/net/802/Makefile
+++ b/net/802/Makefile
@@ -10,3 +10,5 @@ obj-$(CONFIG_FDDI) += fddi.o
obj-$(CONFIG_HIPPI) += hippi.o
obj-$(CONFIG_IPX) += p8022.o psnap.o p8023.o
obj-$(CONFIG_ATALK) += p8022.o psnap.o
+obj-$(CONFIG_STP) += stp.o
+obj-$(CONFIG_GARP) += garp.o
diff --git a/net/802/garp.c b/net/802/garp.c
new file mode 100644
index 000000000000..1dcb0660c49d
--- /dev/null
+++ b/net/802/garp.c
@@ -0,0 +1,636 @@
+/*
+ * IEEE 802.1D Generic Attribute Registration Protocol (GARP)
+ *
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/llc.h>
+#include <net/llc.h>
+#include <net/llc_pdu.h>
+#include <net/garp.h>
+#include <asm/unaligned.h>
+
+static unsigned int garp_join_time __read_mostly = 200;
+module_param(garp_join_time, uint, 0644);
+MODULE_PARM_DESC(garp_join_time, "Join time in ms (default 200ms)");
+MODULE_LICENSE("GPL");
+
+static const struct garp_state_trans {
+ u8 state;
+ u8 action;
+} garp_applicant_state_table[GARP_APPLICANT_MAX + 1][GARP_EVENT_MAX + 1] = {
+ [GARP_APPLICANT_VA] = {
+ [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_AA,
+ .action = GARP_ACTION_S_JOIN_IN },
+ [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_AA },
+ [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VA },
+ [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VA },
+ [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VA },
+ [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID },
+ [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_LA },
+ },
+ [GARP_APPLICANT_AA] = {
+ [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_QA,
+ .action = GARP_ACTION_S_JOIN_IN },
+ [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QA },
+ [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VA },
+ [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VA },
+ [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VA },
+ [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID },
+ [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_LA },
+ },
+ [GARP_APPLICANT_QA] = {
+ [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID },
+ [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QA },
+ [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VA },
+ [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VA },
+ [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID },
+ [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_LA },
+ },
+ [GARP_APPLICANT_LA] = {
+ [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_VO,
+ .action = GARP_ACTION_S_LEAVE_EMPTY },
+ [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_LA },
+ [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_LA },
+ [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_LA },
+ [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_VA },
+ [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID },
+ },
+ [GARP_APPLICANT_VP] = {
+ [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_AA,
+ .action = GARP_ACTION_S_JOIN_IN },
+ [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_AP },
+ [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID },
+ [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_VO },
+ },
+ [GARP_APPLICANT_AP] = {
+ [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_QA,
+ .action = GARP_ACTION_S_JOIN_IN },
+ [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QP },
+ [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID },
+ [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_AO },
+ },
+ [GARP_APPLICANT_QP] = {
+ [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID },
+ [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QP },
+ [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_INVALID },
+ [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_QO },
+ },
+ [GARP_APPLICANT_VO] = {
+ [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID },
+ [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_AO },
+ [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_VP },
+ [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID },
+ },
+ [GARP_APPLICANT_AO] = {
+ [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID },
+ [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QO },
+ [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_AP },
+ [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID },
+ },
+ [GARP_APPLICANT_QO] = {
+ [GARP_EVENT_TRANSMIT_PDU] = { .state = GARP_APPLICANT_INVALID },
+ [GARP_EVENT_R_JOIN_IN] = { .state = GARP_APPLICANT_QO },
+ [GARP_EVENT_R_JOIN_EMPTY] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_R_EMPTY] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_R_LEAVE_IN] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_R_LEAVE_EMPTY] = { .state = GARP_APPLICANT_VO },
+ [GARP_EVENT_REQ_JOIN] = { .state = GARP_APPLICANT_QP },
+ [GARP_EVENT_REQ_LEAVE] = { .state = GARP_APPLICANT_INVALID },
+ },
+};
+
+static int garp_attr_cmp(const struct garp_attr *attr,
+ const void *data, u8 len, u8 type)
+{
+ if (attr->type != type)
+ return attr->type - type;
+ if (attr->dlen != len)
+ return attr->dlen - len;
+ return memcmp(attr->data, data, len);
+}
+
+static struct garp_attr *garp_attr_lookup(const struct garp_applicant *app,
+ const void *data, u8 len, u8 type)
+{
+ struct rb_node *parent = app->gid.rb_node;
+ struct garp_attr *attr;
+ int d;
+
+ while (parent) {
+ attr = rb_entry(parent, struct garp_attr, node);
+ d = garp_attr_cmp(attr, data, len, type);
+ if (d < 0)
+ parent = parent->rb_left;
+ else if (d > 0)
+ parent = parent->rb_right;
+ else
+ return attr;
+ }
+ return NULL;
+}
+
+static void garp_attr_insert(struct garp_applicant *app, struct garp_attr *new)
+{
+ struct rb_node *parent = NULL, **p = &app->gid.rb_node;
+ struct garp_attr *attr;
+ int d;
+
+ while (*p) {
+ parent = *p;
+ attr = rb_entry(parent, struct garp_attr, node);
+ d = garp_attr_cmp(attr, new->data, new->dlen, new->type);
+ if (d < 0)
+ p = &parent->rb_left;
+ else if (d > 0)
+ p = &parent->rb_right;
+ }
+ rb_link_node(&new->node, parent, p);
+ rb_insert_color(&new->node, &app->gid);
+}
+
+static struct garp_attr *garp_attr_create(struct garp_applicant *app,
+ const void *data, u8 len, u8 type)
+{
+ struct garp_attr *attr;
+
+ attr = kmalloc(sizeof(*attr) + len, GFP_ATOMIC);
+ if (!attr)
+ return attr;
+ attr->state = GARP_APPLICANT_VO;
+ attr->type = type;
+ attr->dlen = len;
+ memcpy(attr->data, data, len);
+ garp_attr_insert(app, attr);
+ return attr;
+}
+
+static void garp_attr_destroy(struct garp_applicant *app, struct garp_attr *attr)
+{
+ rb_erase(&attr->node, &app->gid);
+ kfree(attr);
+}
+
+static int garp_pdu_init(struct garp_applicant *app)
+{
+ struct sk_buff *skb;
+ struct garp_pdu_hdr *gp;
+
+#define LLC_RESERVE sizeof(struct llc_pdu_un)
+ skb = alloc_skb(app->dev->mtu + LL_RESERVED_SPACE(app->dev),
+ GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+
+ skb->dev = app->dev;
+ skb->protocol = htons(ETH_P_802_2);
+ skb_reserve(skb, LL_RESERVED_SPACE(app->dev) + LLC_RESERVE);
+
+ gp = (struct garp_pdu_hdr *)__skb_put(skb, sizeof(*gp));
+ put_unaligned(htons(GARP_PROTOCOL_ID), &gp->protocol);
+
+ app->pdu = skb;
+ return 0;
+}
+
+static int garp_pdu_append_end_mark(struct garp_applicant *app)
+{
+ if (skb_tailroom(app->pdu) < sizeof(u8))
+ return -1;
+ *(u8 *)__skb_put(app->pdu, sizeof(u8)) = GARP_END_MARK;
+ return 0;
+}
+
+static void garp_pdu_queue(struct garp_applicant *app)
+{
+ if (!app->pdu)
+ return;
+
+ garp_pdu_append_end_mark(app);
+ garp_pdu_append_end_mark(app);
+
+ llc_pdu_header_init(app->pdu, LLC_PDU_TYPE_U, LLC_SAP_BSPAN,
+ LLC_SAP_BSPAN, LLC_PDU_CMD);
+ llc_pdu_init_as_ui_cmd(app->pdu);
+ llc_mac_hdr_init(app->pdu, app->dev->dev_addr,
+ app->app->proto.group_address);
+
+ skb_queue_tail(&app->queue, app->pdu);
+ app->pdu = NULL;
+}
+
+static void garp_queue_xmit(struct garp_applicant *app)
+{
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&app->queue)))
+ dev_queue_xmit(skb);
+}
+
+static int garp_pdu_append_msg(struct garp_applicant *app, u8 attrtype)
+{
+ struct garp_msg_hdr *gm;
+
+ if (skb_tailroom(app->pdu) < sizeof(*gm))
+ return -1;
+ gm = (struct garp_msg_hdr *)__skb_put(app->pdu, sizeof(*gm));
+ gm->attrtype = attrtype;
+ garp_cb(app->pdu)->cur_type = attrtype;
+ return 0;
+}
+
+static int garp_pdu_append_attr(struct garp_applicant *app,
+ const struct garp_attr *attr,
+ enum garp_attr_event event)
+{
+ struct garp_attr_hdr *ga;
+ unsigned int len;
+ int err;
+again:
+ if (!app->pdu) {
+ err = garp_pdu_init(app);
+ if (err < 0)
+ return err;
+ }
+
+ if (garp_cb(app->pdu)->cur_type != attr->type) {
+ if (garp_cb(app->pdu)->cur_type &&
+ garp_pdu_append_end_mark(app) < 0)
+ goto queue;
+ if (garp_pdu_append_msg(app, attr->type) < 0)
+ goto queue;
+ }
+
+ len = sizeof(*ga) + attr->dlen;
+ if (skb_tailroom(app->pdu) < len)
+ goto queue;
+ ga = (struct garp_attr_hdr *)__skb_put(app->pdu, len);
+ ga->len = len;
+ ga->event = event;
+ memcpy(ga->data, attr->data, attr->dlen);
+ return 0;
+
+queue:
+ garp_pdu_queue(app);
+ goto again;
+}
+
+static void garp_attr_event(struct garp_applicant *app,
+ struct garp_attr *attr, enum garp_event event)
+{
+ enum garp_applicant_state state;
+
+ state = garp_applicant_state_table[attr->state][event].state;
+ if (state == GARP_APPLICANT_INVALID)
+ return;
+
+ switch (garp_applicant_state_table[attr->state][event].action) {
+ case GARP_ACTION_NONE:
+ break;
+ case GARP_ACTION_S_JOIN_IN:
+ /* When appending the attribute fails, don't update state in
+ * order to retry on next TRANSMIT_PDU event. */
+ if (garp_pdu_append_attr(app, attr, GARP_JOIN_IN) < 0)
+ return;
+ break;
+ case GARP_ACTION_S_LEAVE_EMPTY:
+ garp_pdu_append_attr(app, attr, GARP_LEAVE_EMPTY);
+ /* As a pure applicant, sending a leave message implies that
+ * the attribute was unregistered and can be destroyed. */
+ garp_attr_destroy(app, attr);
+ return;
+ default:
+ WARN_ON(1);
+ }
+
+ attr->state = state;
+}
+
+int garp_request_join(const struct net_device *dev,
+ const struct garp_application *appl,
+ const void *data, u8 len, u8 type)
+{
+ struct garp_port *port = dev->garp_port;
+ struct garp_applicant *app = port->applicants[appl->type];
+ struct garp_attr *attr;
+
+ spin_lock_bh(&app->lock);
+ attr = garp_attr_create(app, data, len, type);
+ if (!attr) {
+ spin_unlock_bh(&app->lock);
+ return -ENOMEM;
+ }
+ garp_attr_event(app, attr, GARP_EVENT_REQ_JOIN);
+ spin_unlock_bh(&app->lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(garp_request_join);
+
+void garp_request_leave(const struct net_device *dev,
+ const struct garp_application *appl,
+ const void *data, u8 len, u8 type)
+{
+ struct garp_port *port = dev->garp_port;
+ struct garp_applicant *app = port->applicants[appl->type];
+ struct garp_attr *attr;
+
+ spin_lock_bh(&app->lock);
+ attr = garp_attr_lookup(app, data, len, type);
+ if (!attr) {
+ spin_unlock_bh(&app->lock);
+ return;
+ }
+ garp_attr_event(app, attr, GARP_EVENT_REQ_LEAVE);
+ spin_unlock_bh(&app->lock);
+}
+EXPORT_SYMBOL_GPL(garp_request_leave);
+
+static void garp_gid_event(struct garp_applicant *app, enum garp_event event)
+{
+ struct rb_node *node, *next;
+ struct garp_attr *attr;
+
+ for (node = rb_first(&app->gid);
+ next = node ? rb_next(node) : NULL, node != NULL;
+ node = next) {
+ attr = rb_entry(node, struct garp_attr, node);
+ garp_attr_event(app, attr, event);
+ }
+}
+
+static void garp_join_timer_arm(struct garp_applicant *app)
+{
+ unsigned long delay;
+
+ delay = (u64)msecs_to_jiffies(garp_join_time) * net_random() >> 32;
+ mod_timer(&app->join_timer, jiffies + delay);
+}
+
+static void garp_join_timer(unsigned long data)
+{
+ struct garp_applicant *app = (struct garp_applicant *)data;
+
+ spin_lock(&app->lock);
+ garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
+ garp_pdu_queue(app);
+ spin_unlock(&app->lock);
+
+ garp_queue_xmit(app);
+ garp_join_timer_arm(app);
+}
+
+static int garp_pdu_parse_end_mark(struct sk_buff *skb)
+{
+ if (!pskb_may_pull(skb, sizeof(u8)))
+ return -1;
+ if (*skb->data == GARP_END_MARK) {
+ skb_pull(skb, sizeof(u8));
+ return -1;
+ }
+ return 0;
+}
+
+static int garp_pdu_parse_attr(struct garp_applicant *app, struct sk_buff *skb,
+ u8 attrtype)
+{
+ const struct garp_attr_hdr *ga;
+ struct garp_attr *attr;
+ enum garp_event event;
+ unsigned int dlen;
+
+ if (!pskb_may_pull(skb, sizeof(*ga)))
+ return -1;
+ ga = (struct garp_attr_hdr *)skb->data;
+ if (ga->len < sizeof(*ga))
+ return -1;
+
+ if (!pskb_may_pull(skb, ga->len))
+ return -1;
+ skb_pull(skb, ga->len);
+ dlen = sizeof(*ga) - ga->len;
+
+ if (attrtype > app->app->maxattr)
+ return 0;
+
+ switch (ga->event) {
+ case GARP_LEAVE_ALL:
+ if (dlen != 0)
+ return -1;
+ garp_gid_event(app, GARP_EVENT_R_LEAVE_EMPTY);
+ return 0;
+ case GARP_JOIN_EMPTY:
+ event = GARP_EVENT_R_JOIN_EMPTY;
+ break;
+ case GARP_JOIN_IN:
+ event = GARP_EVENT_R_JOIN_IN;
+ break;
+ case GARP_LEAVE_EMPTY:
+ event = GARP_EVENT_R_LEAVE_EMPTY;
+ break;
+ case GARP_EMPTY:
+ event = GARP_EVENT_R_EMPTY;
+ break;
+ default:
+ return 0;
+ }
+
+ if (dlen == 0)
+ return -1;
+ attr = garp_attr_lookup(app, ga->data, dlen, attrtype);
+ if (attr == NULL)
+ return 0;
+ garp_attr_event(app, attr, event);
+ return 0;
+}
+
+static int garp_pdu_parse_msg(struct garp_applicant *app, struct sk_buff *skb)
+{
+ const struct garp_msg_hdr *gm;
+
+ if (!pskb_may_pull(skb, sizeof(*gm)))
+ return -1;
+ gm = (struct garp_msg_hdr *)skb->data;
+ if (gm->attrtype == 0)
+ return -1;
+ skb_pull(skb, sizeof(*gm));
+
+ while (skb->len > 0) {
+ if (garp_pdu_parse_attr(app, skb, gm->attrtype) < 0)
+ return -1;
+ if (garp_pdu_parse_end_mark(skb) < 0)
+ break;
+ }
+ return 0;
+}
+
+static void garp_pdu_rcv(const struct stp_proto *proto, struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct garp_application *appl = proto->data;
+ struct garp_port *port;
+ struct garp_applicant *app;
+ const struct garp_pdu_hdr *gp;
+
+ port = rcu_dereference(dev->garp_port);
+ if (!port)
+ goto err;
+ app = rcu_dereference(port->applicants[appl->type]);
+ if (!app)
+ goto err;
+
+ if (!pskb_may_pull(skb, sizeof(*gp)))
+ goto err;
+ gp = (struct garp_pdu_hdr *)skb->data;
+ if (get_unaligned(&gp->protocol) != htons(GARP_PROTOCOL_ID))
+ goto err;
+ skb_pull(skb, sizeof(*gp));
+
+ spin_lock(&app->lock);
+ while (skb->len > 0) {
+ if (garp_pdu_parse_msg(app, skb) < 0)
+ break;
+ if (garp_pdu_parse_end_mark(skb) < 0)
+ break;
+ }
+ spin_unlock(&app->lock);
+err:
+ kfree_skb(skb);
+}
+
+static int garp_init_port(struct net_device *dev)
+{
+ struct garp_port *port;
+
+ port = kzalloc(sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return -ENOMEM;
+ rcu_assign_pointer(dev->garp_port, port);
+ return 0;
+}
+
+static void garp_release_port(struct net_device *dev)
+{
+ struct garp_port *port = dev->garp_port;
+ unsigned int i;
+
+ for (i = 0; i <= GARP_APPLICATION_MAX; i++) {
+ if (port->applicants[i])
+ return;
+ }
+ rcu_assign_pointer(dev->garp_port, NULL);
+ synchronize_rcu();
+ kfree(port);
+}
+
+int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
+{
+ struct garp_applicant *app;
+ int err;
+
+ ASSERT_RTNL();
+
+ if (!dev->garp_port) {
+ err = garp_init_port(dev);
+ if (err < 0)
+ goto err1;
+ }
+
+ err = -ENOMEM;
+ app = kzalloc(sizeof(*app), GFP_KERNEL);
+ if (!app)
+ goto err2;
+
+ err = dev_mc_add(dev, appl->proto.group_address, ETH_ALEN, 0);
+ if (err < 0)
+ goto err3;
+
+ app->dev = dev;
+ app->app = appl;
+ app->gid = RB_ROOT;
+ spin_lock_init(&app->lock);
+ skb_queue_head_init(&app->queue);
+ rcu_assign_pointer(dev->garp_port->applicants[appl->type], app);
+ setup_timer(&app->join_timer, garp_join_timer, (unsigned long)app);
+ garp_join_timer_arm(app);
+ return 0;
+
+err3:
+ kfree(app);
+err2:
+ garp_release_port(dev);
+err1:
+ return err;
+}
+EXPORT_SYMBOL_GPL(garp_init_applicant);
+
+void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl)
+{
+ struct garp_port *port = dev->garp_port;
+ struct garp_applicant *app = port->applicants[appl->type];
+
+ ASSERT_RTNL();
+
+ rcu_assign_pointer(port->applicants[appl->type], NULL);
+ synchronize_rcu();
+
+ /* Delete timer and generate a final TRANSMIT_PDU event to flush out
+ * all pending messages before the applicant is gone. */
+ del_timer_sync(&app->join_timer);
+ garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
+ garp_pdu_queue(app);
+ garp_queue_xmit(app);
+
+ dev_mc_delete(dev, appl->proto.group_address, ETH_ALEN, 0);
+ kfree(app);
+ garp_release_port(dev);
+}
+EXPORT_SYMBOL_GPL(garp_uninit_applicant);
+
+int garp_register_application(struct garp_application *appl)
+{
+ appl->proto.rcv = garp_pdu_rcv;
+ appl->proto.data = appl;
+ return stp_proto_register(&appl->proto);
+}
+EXPORT_SYMBOL_GPL(garp_register_application);
+
+void garp_unregister_application(struct garp_application *appl)
+{
+ stp_proto_unregister(&appl->proto);
+}
+EXPORT_SYMBOL_GPL(garp_unregister_application);
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 31128cb92a23..ea4643931446 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -20,6 +20,7 @@
#include <linux/mm.h>
#include <linux/in.h>
#include <linux/init.h>
+#include <linux/rculist.h>
static LIST_HEAD(snap_list);
static DEFINE_SPINLOCK(snap_lock);
diff --git a/net/802/stp.c b/net/802/stp.c
new file mode 100644
index 000000000000..0b7a24452d11
--- /dev/null
+++ b/net/802/stp.c
@@ -0,0 +1,102 @@
+/*
+ * STP SAP demux
+ *
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+#include <linux/mutex.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/llc.h>
+#include <net/llc.h>
+#include <net/llc_pdu.h>
+#include <net/stp.h>
+
+/* 01:80:c2:00:00:20 - 01:80:c2:00:00:2F */
+#define GARP_ADDR_MIN 0x20
+#define GARP_ADDR_MAX 0x2F
+#define GARP_ADDR_RANGE (GARP_ADDR_MAX - GARP_ADDR_MIN)
+
+static const struct stp_proto *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly;
+static const struct stp_proto *stp_proto __read_mostly;
+
+static struct llc_sap *sap __read_mostly;
+static unsigned int sap_registered;
+static DEFINE_MUTEX(stp_proto_mutex);
+
+/* Called under rcu_read_lock from LLC */
+static int stp_pdu_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev)
+{
+ const struct ethhdr *eh = eth_hdr(skb);
+ const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
+ const struct stp_proto *proto;
+
+ if (pdu->ssap != LLC_SAP_BSPAN ||
+ pdu->dsap != LLC_SAP_BSPAN ||
+ pdu->ctrl_1 != LLC_PDU_TYPE_U)
+ goto err;
+
+ if (eh->h_dest[5] >= GARP_ADDR_MIN && eh->h_dest[5] <= GARP_ADDR_MAX) {
+ proto = rcu_dereference(garp_protos[eh->h_dest[5] -
+ GARP_ADDR_MIN]);
+ if (proto &&
+ compare_ether_addr(eh->h_dest, proto->group_address))
+ goto err;
+ } else
+ proto = rcu_dereference(stp_proto);
+
+ if (!proto)
+ goto err;
+
+ proto->rcv(proto, skb, dev);
+ return 0;
+
+err:
+ kfree_skb(skb);
+ return 0;
+}
+
+int stp_proto_register(const struct stp_proto *proto)
+{
+ int err = 0;
+
+ mutex_lock(&stp_proto_mutex);
+ if (sap_registered++ == 0) {
+ sap = llc_sap_open(LLC_SAP_BSPAN, stp_pdu_rcv);
+ if (!sap) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+ if (is_zero_ether_addr(proto->group_address))
+ rcu_assign_pointer(stp_proto, proto);
+ else
+ rcu_assign_pointer(garp_protos[proto->group_address[5] -
+ GARP_ADDR_MIN], proto);
+out:
+ mutex_unlock(&stp_proto_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(stp_proto_register);
+
+void stp_proto_unregister(const struct stp_proto *proto)
+{
+ mutex_lock(&stp_proto_mutex);
+ if (is_zero_ether_addr(proto->group_address))
+ rcu_assign_pointer(stp_proto, NULL);
+ else
+ rcu_assign_pointer(garp_protos[proto->group_address[5] -
+ GARP_ADDR_MIN], NULL);
+ synchronize_rcu();
+
+ if (--sap_registered == 0)
+ llc_sap_put(sap);
+ mutex_unlock(&stp_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(stp_proto_unregister);
+
+MODULE_LICENSE("GPL");
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
index c4a382e450e2..fa073a54963e 100644
--- a/net/8021q/Kconfig
+++ b/net/8021q/Kconfig
@@ -17,3 +17,13 @@ config VLAN_8021Q
will be called 8021q.
If unsure, say N.
+
+config VLAN_8021Q_GVRP
+ bool "GVRP (GARP VLAN Registration Protocol) support"
+ depends on VLAN_8021Q
+ select GARP
+ help
+ Select this to enable GVRP end-system support. GVRP is used for
+ automatic propagation of registered VLANs to switches.
+
+ If unsure, say N.
diff --git a/net/8021q/Makefile b/net/8021q/Makefile
index 10ca7f486c3a..9f4f174ead1c 100644
--- a/net/8021q/Makefile
+++ b/net/8021q/Makefile
@@ -1,12 +1,10 @@
#
# Makefile for the Linux VLAN layer.
#
+obj-$(subst m,y,$(CONFIG_VLAN_8021Q)) += vlan_core.o
+obj-$(CONFIG_VLAN_8021Q) += 8021q.o
-obj-$(CONFIG_VLAN_8021Q) += 8021q.o
-
-8021q-objs := vlan.o vlan_dev.o vlan_netlink.o
-
-ifeq ($(CONFIG_PROC_FS),y)
-8021q-objs += vlanproc.o
-endif
+8021q-y := vlan.o vlan_dev.o vlan_netlink.o
+8021q-$(CONFIG_VLAN_8021Q_GVRP) += vlan_gvrp.o
+8021q-$(CONFIG_PROC_FS) += vlanproc.o
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index ab2225da0ee2..b661f47bf10a 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -18,21 +18,20 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <asm/uaccess.h> /* for copy_from_user */
#include <linux/capability.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
-#include <net/datalink.h>
-#include <linux/mm.h>
-#include <linux/in.h>
#include <linux/init.h>
+#include <linux/rculist.h>
#include <net/p8022.h>
#include <net/arp.h>
#include <linux/rtnetlink.h>
#include <linux/notifier.h>
+#include <net/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <asm/uaccess.h>
#include <linux/if_vlan.h>
#include "vlan.h"
@@ -83,13 +82,12 @@ static struct vlan_group *__vlan_find_group(struct net_device *real_dev)
*
* Must be invoked with RCU read lock (no preempt)
*/
-struct net_device *__find_vlan_dev(struct net_device *real_dev,
- unsigned short VID)
+struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id)
{
struct vlan_group *grp = __vlan_find_group(real_dev);
if (grp)
- return vlan_group_get_device(grp, VID);
+ return vlan_group_get_device(grp, vlan_id);
return NULL;
}
@@ -117,14 +115,14 @@ static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
return grp;
}
-static int vlan_group_prealloc_vid(struct vlan_group *vg, int vid)
+static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
{
struct net_device **array;
unsigned int size;
ASSERT_RTNL();
- array = vg->vlan_devices_arrays[vid / VLAN_GROUP_ARRAY_PART_LEN];
+ array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
if (array != NULL)
return 0;
@@ -133,7 +131,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, int vid)
if (array == NULL)
return -ENOBUFS;
- vg->vlan_devices_arrays[vid / VLAN_GROUP_ARRAY_PART_LEN] = array;
+ vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array;
return 0;
}
@@ -147,7 +145,7 @@ void unregister_vlan_dev(struct net_device *dev)
struct vlan_dev_info *vlan = vlan_dev_info(dev);
struct net_device *real_dev = vlan->real_dev;
struct vlan_group *grp;
- unsigned short vlan_id = vlan->vlan_id;
+ u16 vlan_id = vlan->vlan_id;
ASSERT_RTNL();
@@ -165,8 +163,12 @@ void unregister_vlan_dev(struct net_device *dev)
synchronize_net();
+ unregister_netdevice(dev);
+
/* If the group is now empty, kill off the group. */
if (grp->nr_vlans == 0) {
+ vlan_gvrp_uninit_applicant(real_dev);
+
if (real_dev->features & NETIF_F_HW_VLAN_RX)
real_dev->vlan_rx_register(real_dev, NULL);
@@ -178,8 +180,6 @@ void unregister_vlan_dev(struct net_device *dev)
/* Get rid of the vlan's reference to real_dev */
dev_put(real_dev);
-
- unregister_netdevice(dev);
}
static void vlan_transfer_operstate(const struct net_device *dev,
@@ -203,7 +203,7 @@ static void vlan_transfer_operstate(const struct net_device *dev,
}
}
-int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
+int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
{
char *name = real_dev->name;
@@ -240,7 +240,7 @@ int register_vlan_dev(struct net_device *dev)
{
struct vlan_dev_info *vlan = vlan_dev_info(dev);
struct net_device *real_dev = vlan->real_dev;
- unsigned short vlan_id = vlan->vlan_id;
+ u16 vlan_id = vlan->vlan_id;
struct vlan_group *grp, *ngrp = NULL;
int err;
@@ -249,15 +249,18 @@ int register_vlan_dev(struct net_device *dev)
ngrp = grp = vlan_group_alloc(real_dev);
if (!grp)
return -ENOBUFS;
+ err = vlan_gvrp_init_applicant(real_dev);
+ if (err < 0)
+ goto out_free_group;
}
err = vlan_group_prealloc_vid(grp, vlan_id);
if (err < 0)
- goto out_free_group;
+ goto out_uninit_applicant;
err = register_netdevice(dev);
if (err < 0)
- goto out_free_group;
+ goto out_uninit_applicant;
/* Account for reference in struct vlan_dev_info */
dev_hold(real_dev);
@@ -278,6 +281,9 @@ int register_vlan_dev(struct net_device *dev)
return 0;
+out_uninit_applicant:
+ if (ngrp)
+ vlan_gvrp_uninit_applicant(real_dev);
out_free_group:
if (ngrp)
vlan_group_free(ngrp);
@@ -287,8 +293,7 @@ out_free_group:
/* Attach a VLAN device to a mac address (ie Ethernet Card).
* Returns 0 if the device was created or a negative error code otherwise.
*/
-static int register_vlan_device(struct net_device *real_dev,
- unsigned short VLAN_ID)
+static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
{
struct net_device *new_dev;
struct net *net = dev_net(real_dev);
@@ -296,10 +301,10 @@ static int register_vlan_device(struct net_device *real_dev,
char name[IFNAMSIZ];
int err;
- if (VLAN_ID >= VLAN_VID_MASK)
+ if (vlan_id >= VLAN_VID_MASK)
return -ERANGE;
- err = vlan_check_real_dev(real_dev, VLAN_ID);
+ err = vlan_check_real_dev(real_dev, vlan_id);
if (err < 0)
return err;
@@ -307,26 +312,26 @@ static int register_vlan_device(struct net_device *real_dev,
switch (vn->name_type) {
case VLAN_NAME_TYPE_RAW_PLUS_VID:
/* name will look like: eth1.0005 */
- snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, VLAN_ID);
+ snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, vlan_id);
break;
case VLAN_NAME_TYPE_PLUS_VID_NO_PAD:
/* Put our vlan.VID in the name.
* Name will look like: vlan5
*/
- snprintf(name, IFNAMSIZ, "vlan%i", VLAN_ID);
+ snprintf(name, IFNAMSIZ, "vlan%i", vlan_id);
break;
case VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD:
/* Put our vlan.VID in the name.
* Name will look like: eth0.5
*/
- snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, VLAN_ID);
+ snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, vlan_id);
break;
case VLAN_NAME_TYPE_PLUS_VID:
/* Put our vlan.VID in the name.
* Name will look like: vlan0005
*/
default:
- snprintf(name, IFNAMSIZ, "vlan%.4i", VLAN_ID);
+ snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
}
new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
@@ -341,7 +346,7 @@ static int register_vlan_device(struct net_device *real_dev,
*/
new_dev->mtu = real_dev->mtu;
- vlan_dev_info(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
+ vlan_dev_info(new_dev)->vlan_id = vlan_id;
vlan_dev_info(new_dev)->real_dev = real_dev;
vlan_dev_info(new_dev)->dent = NULL;
vlan_dev_info(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
@@ -535,7 +540,6 @@ static struct notifier_block vlan_notifier_block __read_mostly = {
static int vlan_ioctl_handler(struct net *net, void __user *arg)
{
int err;
- unsigned short vid = 0;
struct vlan_ioctl_args args;
struct net_device *dev = NULL;
@@ -562,8 +566,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
goto out;
err = -EINVAL;
- if (args.cmd != ADD_VLAN_CMD &&
- !(dev->priv_flags & IFF_802_1Q_VLAN))
+ if (args.cmd != ADD_VLAN_CMD && !is_vlan_dev(dev))
goto out;
}
@@ -591,9 +594,9 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
err = -EPERM;
if (!capable(CAP_NET_ADMIN))
break;
- err = vlan_dev_set_vlan_flag(dev,
- args.u.flag,
- args.vlan_qos);
+ err = vlan_dev_change_flags(dev,
+ args.vlan_qos ? args.u.flag : 0,
+ args.u.flag);
break;
case SET_VLAN_NAME_TYPE_CMD:
@@ -637,8 +640,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg)
case GET_VLAN_VID_CMD:
err = 0;
- vlan_dev_get_vid(dev, &vid);
- args.u.VID = vid;
+ args.u.VID = vlan_dev_vlan_id(dev);
if (copy_to_user(arg, &args,
sizeof(struct vlan_ioctl_args)))
err = -EFAULT;
@@ -713,14 +715,20 @@ static int __init vlan_proto_init(void)
if (err < 0)
goto err2;
- err = vlan_netlink_init();
+ err = vlan_gvrp_init();
if (err < 0)
goto err3;
+ err = vlan_netlink_init();
+ if (err < 0)
+ goto err4;
+
dev_add_pack(&vlan_packet_type);
vlan_ioctl_set(vlan_ioctl_handler);
return 0;
+err4:
+ vlan_gvrp_uninit();
err3:
unregister_netdevice_notifier(&vlan_notifier_block);
err2:
@@ -745,8 +753,9 @@ static void __exit vlan_cleanup_module(void)
BUG_ON(!hlist_empty(&vlan_group_hash[i]));
unregister_pernet_gen_device(vlan_net_id, &vlan_net_ops);
-
synchronize_net();
+
+ vlan_gvrp_uninit();
}
module_init(vlan_proto_init);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5229a72c7ea1..a6603a4d917f 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -3,6 +3,55 @@
#include <linux/if_vlan.h>
+
+/**
+ * struct vlan_priority_tci_mapping - vlan egress priority mappings
+ * @priority: skb priority
+ * @vlan_qos: vlan priority: (skb->priority << 13) & 0xE000
+ * @next: pointer to next struct
+ */
+struct vlan_priority_tci_mapping {
+ u32 priority;
+ u16 vlan_qos;
+ struct vlan_priority_tci_mapping *next;
+};
+
+/**
+ * struct vlan_dev_info - VLAN private device data
+ * @nr_ingress_mappings: number of ingress priority mappings
+ * @ingress_priority_map: ingress priority mappings
+ * @nr_egress_mappings: number of egress priority mappings
+ * @egress_priority_map: hash of egress priority mappings
+ * @vlan_id: VLAN identifier
+ * @flags: device flags
+ * @real_dev: underlying netdevice
+ * @real_dev_addr: address of underlying netdevice
+ * @dent: proc dir entry
+ * @cnt_inc_headroom_on_tx: statistic - number of skb expansions on TX
+ * @cnt_encap_on_xmit: statistic - number of skb encapsulations on TX
+ */
+struct vlan_dev_info {
+ unsigned int nr_ingress_mappings;
+ u32 ingress_priority_map[8];
+ unsigned int nr_egress_mappings;
+ struct vlan_priority_tci_mapping *egress_priority_map[16];
+
+ u16 vlan_id;
+ u16 flags;
+
+ struct net_device *real_dev;
+ unsigned char real_dev_addr[ETH_ALEN];
+
+ struct proc_dir_entry *dent;
+ unsigned long cnt_inc_headroom_on_tx;
+ unsigned long cnt_encap_on_xmit;
+};
+
+static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
+{
+ return netdev_priv(dev);
+}
+
#define VLAN_GRP_HASH_SHIFT 5
#define VLAN_GRP_HASH_SIZE (1 << VLAN_GRP_HASH_SHIFT)
#define VLAN_GRP_HASH_MASK (VLAN_GRP_HASH_SIZE - 1)
@@ -18,26 +67,47 @@
* Must be invoked with rcu_read_lock (ie preempt disabled)
* or with RTNL.
*/
-struct net_device *__find_vlan_dev(struct net_device *real_dev,
- unsigned short VID); /* vlan.c */
+struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id);
/* found in vlan_dev.c */
int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *ptype, struct net_device *orig_dev);
void vlan_dev_set_ingress_priority(const struct net_device *dev,
- u32 skb_prio, short vlan_prio);
+ u32 skb_prio, u16 vlan_prio);
int vlan_dev_set_egress_priority(const struct net_device *dev,
- u32 skb_prio, short vlan_prio);
-int vlan_dev_set_vlan_flag(const struct net_device *dev,
- u32 flag, short flag_val);
+ u32 skb_prio, u16 vlan_prio);
+int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
-void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result);
-int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id);
+int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id);
void vlan_setup(struct net_device *dev);
int register_vlan_dev(struct net_device *dev);
void unregister_vlan_dev(struct net_device *dev);
+static inline u32 vlan_get_ingress_priority(struct net_device *dev,
+ u16 vlan_tci)
+{
+ struct vlan_dev_info *vip = vlan_dev_info(dev);
+
+ return vip->ingress_priority_map[(vlan_tci >> 13) & 0x7];
+}
+
+#ifdef CONFIG_VLAN_8021Q_GVRP
+extern int vlan_gvrp_request_join(const struct net_device *dev);
+extern void vlan_gvrp_request_leave(const struct net_device *dev);
+extern int vlan_gvrp_init_applicant(struct net_device *dev);
+extern void vlan_gvrp_uninit_applicant(struct net_device *dev);
+extern int vlan_gvrp_init(void);
+extern void vlan_gvrp_uninit(void);
+#else
+static inline int vlan_gvrp_request_join(const struct net_device *dev) { return 0; }
+static inline void vlan_gvrp_request_leave(const struct net_device *dev) {}
+static inline int vlan_gvrp_init_applicant(struct net_device *dev) { return 0; }
+static inline void vlan_gvrp_uninit_applicant(struct net_device *dev) {}
+static inline int vlan_gvrp_init(void) { return 0; }
+static inline void vlan_gvrp_uninit(void) {}
+#endif
+
int vlan_netlink_init(void);
void vlan_netlink_fini(void);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
new file mode 100644
index 000000000000..916061f681b6
--- /dev/null
+++ b/net/8021q/vlan_core.c
@@ -0,0 +1,64 @@
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include "vlan.h"
+
+/* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */
+int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
+ u16 vlan_tci, int polling)
+{
+ struct net_device_stats *stats;
+
+ if (skb_bond_should_drop(skb)) {
+ dev_kfree_skb_any(skb);
+ return NET_RX_DROP;
+ }
+
+ skb->vlan_tci = vlan_tci;
+ netif_nit_deliver(skb);
+
+ skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK);
+ if (skb->dev == NULL) {
+ dev_kfree_skb_any(skb);
+ /* Not NET_RX_DROP, this is not being dropped
+ * due to congestion. */
+ return NET_RX_SUCCESS;
+ }
+ skb->dev->last_rx = jiffies;
+ skb->vlan_tci = 0;
+
+ stats = &skb->dev->stats;
+ stats->rx_packets++;
+ stats->rx_bytes += skb->len;
+
+ skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
+ switch (skb->pkt_type) {
+ case PACKET_BROADCAST:
+ break;
+ case PACKET_MULTICAST:
+ stats->multicast++;
+ break;
+ case PACKET_OTHERHOST:
+ /* Our lower layer thinks this is not local, let's make sure.
+ * This allows the VLAN to have a different MAC than the
+ * underlying device, and still route correctly. */
+ if (!compare_ether_addr(eth_hdr(skb)->h_dest,
+ skb->dev->dev_addr))
+ skb->pkt_type = PACKET_HOST;
+ break;
+ };
+ return (polling ? netif_receive_skb(skb) : netif_rx(skb));
+}
+EXPORT_SYMBOL(__vlan_hwaccel_rx);
+
+struct net_device *vlan_dev_real_dev(const struct net_device *dev)
+{
+ return vlan_dev_info(dev)->real_dev;
+}
+EXPORT_SYMBOL_GPL(vlan_dev_real_dev);
+
+u16 vlan_dev_vlan_id(const struct net_device *dev)
+{
+ return vlan_dev_info(dev)->vlan_id;
+}
+EXPORT_SYMBOL_GPL(vlan_dev_vlan_id);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 5d055c242ed8..f42bc2b26b85 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -21,21 +21,15 @@
*/
#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/in.h>
-#include <linux/init.h>
-#include <asm/uaccess.h> /* for copy_from_user */
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
-#include <net/datalink.h>
-#include <net/p8022.h>
+#include <linux/ethtool.h>
#include <net/arp.h>
#include "vlan.h"
#include "vlanproc.h"
#include <linux/if_vlan.h>
-#include <net/ip.h>
/*
* Rebuild the Ethernet MAC header. This is called after an ARP
@@ -73,11 +67,8 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb)
static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
{
if (vlan_dev_info(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
- if (skb_shared(skb) || skb_cloned(skb)) {
- struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
- kfree_skb(skb);
- skb = nskb;
- }
+ if (skb_cow(skb, skb_headroom(skb)) < 0)
+ skb = NULL;
if (skb) {
/* Lifted from Gleb's VLAN code... */
memmove(skb->data - ETH_HLEN,
@@ -149,9 +140,9 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *ptype, struct net_device *orig_dev)
{
struct vlan_hdr *vhdr;
- unsigned short vid;
struct net_device_stats *stats;
- unsigned short vlan_TCI;
+ u16 vlan_id;
+ u16 vlan_tci;
skb = skb_share_check(skb, GFP_ATOMIC);
if (skb == NULL)
@@ -161,14 +152,14 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
goto err_free;
vhdr = (struct vlan_hdr *)skb->data;
- vlan_TCI = ntohs(vhdr->h_vlan_TCI);
- vid = (vlan_TCI & VLAN_VID_MASK);
+ vlan_tci = ntohs(vhdr->h_vlan_TCI);
+ vlan_id = vlan_tci & VLAN_VID_MASK;
rcu_read_lock();
- skb->dev = __find_vlan_dev(dev, vid);
+ skb->dev = __find_vlan_dev(dev, vlan_id);
if (!skb->dev) {
pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
- __func__, (unsigned int)vid, dev->name);
+ __func__, vlan_id, dev->name);
goto err_unlock;
}
@@ -180,11 +171,10 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
skb_pull_rcsum(skb, VLAN_HLEN);
- skb->priority = vlan_get_ingress_priority(skb->dev,
- ntohs(vhdr->h_vlan_TCI));
+ skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
pr_debug("%s: priority: %u for TCI: %hu\n",
- __func__, skb->priority, ntohs(vhdr->h_vlan_TCI));
+ __func__, skb->priority, vlan_tci);
switch (skb->pkt_type) {
case PACKET_BROADCAST: /* Yeah, stats collect these together.. */
@@ -227,7 +217,7 @@ err_free:
return NET_RX_DROP;
}
-static inline unsigned short
+static inline u16
vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
{
struct vlan_priority_tci_mapping *mp;
@@ -259,103 +249,44 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
unsigned int len)
{
struct vlan_hdr *vhdr;
- unsigned short veth_TCI = 0;
- int rc = 0;
- int build_vlan_header = 0;
- struct net_device *vdev = dev;
-
- pr_debug("%s: skb: %p type: %hx len: %u vlan_id: %hx, daddr: %p\n",
- __func__, skb, type, len, vlan_dev_info(dev)->vlan_id,
- daddr);
-
- /* build vlan header only if re_order_header flag is NOT set. This
- * fixes some programs that get confused when they see a VLAN device
- * sending a frame that is VLAN encoded (the consensus is that the VLAN
- * device should look completely like an Ethernet device when the
- * REORDER_HEADER flag is set) The drawback to this is some extra
- * header shuffling in the hard_start_xmit. Users can turn off this
- * REORDER behaviour with the vconfig tool.
- */
- if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR))
- build_vlan_header = 1;
+ unsigned int vhdrlen = 0;
+ u16 vlan_tci = 0;
+ int rc;
- if (build_vlan_header) {
- vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
+ if (WARN_ON(skb_headroom(skb) < dev->hard_header_len))
+ return -ENOSPC;
- /* build the four bytes that make this a VLAN header. */
-
- /* Now, construct the second two bytes. This field looks
- * something like:
- * usr_priority: 3 bits (high bits)
- * CFI 1 bit
- * VLAN ID 12 bits (low bits)
- *
- */
- veth_TCI = vlan_dev_info(dev)->vlan_id;
- veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
+ if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+ vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
- vhdr->h_vlan_TCI = htons(veth_TCI);
+ vlan_tci = vlan_dev_info(dev)->vlan_id;
+ vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
+ vhdr->h_vlan_TCI = htons(vlan_tci);
/*
* Set the protocol type. For a packet of type ETH_P_802_3 we
* put the length in here instead. It is up to the 802.2
* layer to carry protocol information.
*/
-
if (type != ETH_P_802_3)
vhdr->h_vlan_encapsulated_proto = htons(type);
else
vhdr->h_vlan_encapsulated_proto = htons(len);
skb->protocol = htons(ETH_P_8021Q);
- skb_reset_network_header(skb);
+ type = ETH_P_8021Q;
+ vhdrlen = VLAN_HLEN;
}
/* Before delegating work to the lower layer, enter our MAC-address */
if (saddr == NULL)
saddr = dev->dev_addr;
+ /* Now make the underlying real hard header */
dev = vlan_dev_info(dev)->real_dev;
-
- /* MPLS can send us skbuffs w/out enough space. This check will grow
- * the skb if it doesn't have enough headroom. Not a beautiful solution,
- * so I'll tick a counter so that users can know it's happening...
- * If they care...
- */
-
- /* NOTE: This may still break if the underlying device is not the final
- * device (and thus there are more headers to add...) It should work for
- * good-ole-ethernet though.
- */
- if (skb_headroom(skb) < dev->hard_header_len) {
- struct sk_buff *sk_tmp = skb;
- skb = skb_realloc_headroom(sk_tmp, dev->hard_header_len);
- kfree_skb(sk_tmp);
- if (skb == NULL) {
- struct net_device_stats *stats = &vdev->stats;
- stats->tx_dropped++;
- return -ENOMEM;
- }
- vlan_dev_info(vdev)->cnt_inc_headroom_on_tx++;
- pr_debug("%s: %s: had to grow skb\n", __func__, vdev->name);
- }
-
- if (build_vlan_header) {
- /* Now make the underlying real hard header */
- rc = dev_hard_header(skb, dev, ETH_P_8021Q, daddr, saddr,
- len + VLAN_HLEN);
- if (rc > 0)
- rc += VLAN_HLEN;
- else if (rc < 0)
- rc -= VLAN_HLEN;
- } else
- /* If here, then we'll just make a normal looking ethernet
- * frame, but, the hard_start_xmit method will insert the tag
- * (it has to be able to do this for bridged and other skbs
- * that don't come down the protocol stack in an orderly manner.
- */
- rc = dev_hard_header(skb, dev, type, daddr, saddr, len);
-
+ rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen);
+ if (rc > 0)
+ rc += vhdrlen;
return rc;
}
@@ -369,78 +300,49 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
* NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
* OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
*/
-
if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
- vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
- int orig_headroom = skb_headroom(skb);
- unsigned short veth_TCI;
+ vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
+ unsigned int orig_headroom = skb_headroom(skb);
+ u16 vlan_tci;
- /* This is not a VLAN frame...but we can fix that! */
vlan_dev_info(dev)->cnt_encap_on_xmit++;
- pr_debug("%s: proto to encap: 0x%hx\n",
- __func__, ntohs(veth->h_vlan_proto));
- /* Construct the second two bytes. This field looks something
- * like:
- * usr_priority: 3 bits (high bits)
- * CFI 1 bit
- * VLAN ID 12 bits (low bits)
- */
- veth_TCI = vlan_dev_info(dev)->vlan_id;
- veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
-
- skb = __vlan_put_tag(skb, veth_TCI);
+ vlan_tci = vlan_dev_info(dev)->vlan_id;
+ vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
+ skb = __vlan_put_tag(skb, vlan_tci);
if (!skb) {
stats->tx_dropped++;
- return 0;
+ return NETDEV_TX_OK;
}
if (orig_headroom < VLAN_HLEN)
vlan_dev_info(dev)->cnt_inc_headroom_on_tx++;
}
- pr_debug("%s: about to send skb: %p to dev: %s\n",
- __func__, skb, skb->dev->name);
- pr_debug(" " MAC_FMT " " MAC_FMT " %4hx %4hx %4hx\n",
- veth->h_dest[0], veth->h_dest[1], veth->h_dest[2],
- veth->h_dest[3], veth->h_dest[4], veth->h_dest[5],
- veth->h_source[0], veth->h_source[1], veth->h_source[2],
- veth->h_source[3], veth->h_source[4], veth->h_source[5],
- veth->h_vlan_proto, veth->h_vlan_TCI,
- veth->h_vlan_encapsulated_proto);
-
- stats->tx_packets++; /* for statics only */
+ stats->tx_packets++;
stats->tx_bytes += skb->len;
skb->dev = vlan_dev_info(dev)->real_dev;
dev_queue_xmit(skb);
-
- return 0;
+ return NETDEV_TX_OK;
}
static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct net_device_stats *stats = &dev->stats;
- unsigned short veth_TCI;
+ u16 vlan_tci;
- /* Construct the second two bytes. This field looks something
- * like:
- * usr_priority: 3 bits (high bits)
- * CFI 1 bit
- * VLAN ID 12 bits (low bits)
- */
- veth_TCI = vlan_dev_info(dev)->vlan_id;
- veth_TCI |= vlan_dev_get_egress_qos_mask(dev, skb);
- skb = __vlan_hwaccel_put_tag(skb, veth_TCI);
+ vlan_tci = vlan_dev_info(dev)->vlan_id;
+ vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
+ skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
stats->tx_packets++;
stats->tx_bytes += skb->len;
skb->dev = vlan_dev_info(dev)->real_dev;
dev_queue_xmit(skb);
-
- return 0;
+ return NETDEV_TX_OK;
}
static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
@@ -457,7 +359,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
}
void vlan_dev_set_ingress_priority(const struct net_device *dev,
- u32 skb_prio, short vlan_prio)
+ u32 skb_prio, u16 vlan_prio)
{
struct vlan_dev_info *vlan = vlan_dev_info(dev);
@@ -470,7 +372,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
}
int vlan_dev_set_egress_priority(const struct net_device *dev,
- u32 skb_prio, short vlan_prio)
+ u32 skb_prio, u16 vlan_prio)
{
struct vlan_dev_info *vlan = vlan_dev_info(dev);
struct vlan_priority_tci_mapping *mp = NULL;
@@ -507,18 +409,23 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
}
/* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */
-int vlan_dev_set_vlan_flag(const struct net_device *dev,
- u32 flag, short flag_val)
+int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
{
- /* verify flag is supported */
- if (flag == VLAN_FLAG_REORDER_HDR) {
- if (flag_val)
- vlan_dev_info(dev)->flags |= VLAN_FLAG_REORDER_HDR;
+ struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ u32 old_flags = vlan->flags;
+
+ if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP))
+ return -EINVAL;
+
+ vlan->flags = (old_flags & ~mask) | (flags & mask);
+
+ if (netif_running(dev) && (vlan->flags ^ old_flags) & VLAN_FLAG_GVRP) {
+ if (vlan->flags & VLAN_FLAG_GVRP)
+ vlan_gvrp_request_join(dev);
else
- vlan_dev_info(dev)->flags &= ~VLAN_FLAG_REORDER_HDR;
- return 0;
+ vlan_gvrp_request_leave(dev);
}
- return -EINVAL;
+ return 0;
}
void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
@@ -526,11 +433,6 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
strncpy(result, vlan_dev_info(dev)->real_dev->name, 23);
}
-void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result)
-{
- *result = vlan_dev_info(dev)->vlan_id;
-}
-
static int vlan_dev_open(struct net_device *dev)
{
struct vlan_dev_info *vlan = vlan_dev_info(dev);
@@ -543,21 +445,44 @@ static int vlan_dev_open(struct net_device *dev)
if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) {
err = dev_unicast_add(real_dev, dev->dev_addr, ETH_ALEN);
if (err < 0)
- return err;
+ goto out;
+ }
+
+ if (dev->flags & IFF_ALLMULTI) {
+ err = dev_set_allmulti(real_dev, 1);
+ if (err < 0)
+ goto del_unicast;
}
+ if (dev->flags & IFF_PROMISC) {
+ err = dev_set_promiscuity(real_dev, 1);
+ if (err < 0)
+ goto clear_allmulti;
+ }
+
memcpy(vlan->real_dev_addr, real_dev->dev_addr, ETH_ALEN);
- if (dev->flags & IFF_ALLMULTI)
- dev_set_allmulti(real_dev, 1);
- if (dev->flags & IFF_PROMISC)
- dev_set_promiscuity(real_dev, 1);
+ if (vlan->flags & VLAN_FLAG_GVRP)
+ vlan_gvrp_request_join(dev);
return 0;
+
+clear_allmulti:
+ if (dev->flags & IFF_ALLMULTI)
+ dev_set_allmulti(real_dev, -1);
+del_unicast:
+ if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
+ dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN);
+out:
+ return err;
}
static int vlan_dev_stop(struct net_device *dev)
{
- struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+ struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct net_device *real_dev = vlan->real_dev;
+
+ if (vlan->flags & VLAN_FLAG_GVRP)
+ vlan_gvrp_request_leave(dev);
dev_mc_unsync(real_dev, dev);
dev_unicast_unsync(real_dev, dev);
@@ -645,6 +570,20 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
*/
static struct lock_class_key vlan_netdev_xmit_lock_key;
+static void vlan_dev_set_lockdep_one(struct net_device *dev,
+ struct netdev_queue *txq,
+ void *_subclass)
+{
+ lockdep_set_class_and_subclass(&txq->_xmit_lock,
+ &vlan_netdev_xmit_lock_key,
+ *(int *)_subclass);
+}
+
+static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass)
+{
+ netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass);
+}
+
static const struct header_ops vlan_header_ops = {
.create = vlan_dev_hard_header,
.rebuild = vlan_dev_rebuild_header,
@@ -683,11 +622,10 @@ static int vlan_dev_init(struct net_device *dev)
dev->hard_start_xmit = vlan_dev_hard_start_xmit;
}
- if (real_dev->priv_flags & IFF_802_1Q_VLAN)
+ if (is_vlan_dev(real_dev))
subclass = 1;
- lockdep_set_class_and_subclass(&dev->_xmit_lock,
- &vlan_netdev_xmit_lock_key, subclass);
+ vlan_dev_set_lockdep_class(dev, subclass);
return 0;
}
@@ -705,6 +643,35 @@ static void vlan_dev_uninit(struct net_device *dev)
}
}
+static u32 vlan_ethtool_get_rx_csum(struct net_device *dev)
+{
+ const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct net_device *real_dev = vlan->real_dev;
+
+ if (real_dev->ethtool_ops == NULL ||
+ real_dev->ethtool_ops->get_rx_csum == NULL)
+ return 0;
+ return real_dev->ethtool_ops->get_rx_csum(real_dev);
+}
+
+static u32 vlan_ethtool_get_flags(struct net_device *dev)
+{
+ const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ struct net_device *real_dev = vlan->real_dev;
+
+ if (!(real_dev->features & NETIF_F_HW_VLAN_RX) ||
+ real_dev->ethtool_ops == NULL ||
+ real_dev->ethtool_ops->get_flags == NULL)
+ return 0;
+ return real_dev->ethtool_ops->get_flags(real_dev);
+}
+
+static const struct ethtool_ops vlan_ethtool_ops = {
+ .get_link = ethtool_op_get_link,
+ .get_rx_csum = vlan_ethtool_get_rx_csum,
+ .get_flags = vlan_ethtool_get_flags,
+};
+
void vlan_setup(struct net_device *dev)
{
ether_setup(dev);
@@ -723,6 +690,7 @@ void vlan_setup(struct net_device *dev)
dev->change_rx_flags = vlan_dev_change_rx_flags;
dev->do_ioctl = vlan_dev_ioctl;
dev->destructor = free_netdev;
+ dev->ethtool_ops = &vlan_ethtool_ops;
memset(dev->broadcast, 0, ETH_ALEN);
}
diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
new file mode 100644
index 000000000000..061ceceeef12
--- /dev/null
+++ b/net/8021q/vlan_gvrp.c
@@ -0,0 +1,66 @@
+/*
+ * IEEE 802.1Q GARP VLAN Registration Protocol (GVRP)
+ *
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/if_vlan.h>
+#include <net/garp.h>
+#include "vlan.h"
+
+#define GARP_GVRP_ADDRESS { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x21 }
+
+enum gvrp_attributes {
+ GVRP_ATTR_INVALID,
+ GVRP_ATTR_VID,
+ __GVRP_ATTR_MAX
+};
+#define GVRP_ATTR_MAX (__GVRP_ATTR_MAX - 1)
+
+static struct garp_application vlan_gvrp_app __read_mostly = {
+ .proto.group_address = GARP_GVRP_ADDRESS,
+ .maxattr = GVRP_ATTR_MAX,
+ .type = GARP_APPLICATION_GVRP,
+};
+
+int vlan_gvrp_request_join(const struct net_device *dev)
+{
+ const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ __be16 vlan_id = htons(vlan->vlan_id);
+
+ return garp_request_join(vlan->real_dev, &vlan_gvrp_app,
+ &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
+}
+
+void vlan_gvrp_request_leave(const struct net_device *dev)
+{
+ const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+ __be16 vlan_id = htons(vlan->vlan_id);
+
+ garp_request_leave(vlan->real_dev, &vlan_gvrp_app,
+ &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
+}
+
+int vlan_gvrp_init_applicant(struct net_device *dev)
+{
+ return garp_init_applicant(dev, &vlan_gvrp_app);
+}
+
+void vlan_gvrp_uninit_applicant(struct net_device *dev)
+{
+ garp_uninit_applicant(dev, &vlan_gvrp_app);
+}
+
+int __init vlan_gvrp_init(void)
+{
+ return garp_register_application(&vlan_gvrp_app);
+}
+
+void vlan_gvrp_uninit(void)
+{
+ garp_unregister_application(&vlan_gvrp_app);
+}
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index c93e69ec28ed..e9c91dcecc9b 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -59,7 +59,8 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
}
if (data[IFLA_VLAN_FLAGS]) {
flags = nla_data(data[IFLA_VLAN_FLAGS]);
- if ((flags->flags & flags->mask) & ~VLAN_FLAG_REORDER_HDR)
+ if ((flags->flags & flags->mask) &
+ ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP))
return -EINVAL;
}
@@ -75,7 +76,6 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
static int vlan_changelink(struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
- struct vlan_dev_info *vlan = vlan_dev_info(dev);
struct ifla_vlan_flags *flags;
struct ifla_vlan_qos_mapping *m;
struct nlattr *attr;
@@ -83,8 +83,7 @@ static int vlan_changelink(struct net_device *dev,
if (data[IFLA_VLAN_FLAGS]) {
flags = nla_data(data[IFLA_VLAN_FLAGS]);
- vlan->flags = (vlan->flags & ~flags->mask) |
- (flags->flags & flags->mask);
+ vlan_dev_change_flags(dev, flags->flags, flags->mask);
}
if (data[IFLA_VLAN_INGRESS_QOS]) {
nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 08b54b593d56..0feefa4e1a4b 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -18,16 +18,9 @@
*****************************************************************************/
#include <linux/module.h>
-#include <linux/stddef.h> /* offsetof(), etc. */
-#include <linux/errno.h> /* return codes */
+#include <linux/errno.h>
#include <linux/kernel.h>
-#include <linux/slab.h> /* kmalloc(), kfree() */
-#include <linux/mm.h>
-#include <linux/string.h> /* inline mem*, str* functions */
-#include <linux/init.h> /* __initfunc et al. */
-#include <asm/byteorder.h> /* htons(), etc. */
-#include <asm/uaccess.h> /* copy_to_user */
-#include <asm/io.h>
+#include <linux/string.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/fs.h>
@@ -290,7 +283,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
static const char fmt[] = "%30s %12lu\n";
int i;
- if (!(vlandev->priv_flags & IFF_802_1Q_VLAN))
+ if (!is_vlan_dev(vlandev))
return 0;
seq_printf(seq,
diff --git a/net/Kconfig b/net/Kconfig
index acbf7c60e89b..b98668751749 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -181,6 +181,7 @@ source "net/dccp/Kconfig"
source "net/sctp/Kconfig"
source "net/tipc/Kconfig"
source "net/atm/Kconfig"
+source "net/802/Kconfig"
source "net/bridge/Kconfig"
source "net/8021q/Kconfig"
source "net/decnet/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index b7a13643b549..4f43e7f874f3 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -42,7 +42,9 @@ obj-$(CONFIG_AF_RXRPC) += rxrpc/
obj-$(CONFIG_ATM) += atm/
obj-$(CONFIG_DECNET) += decnet/
obj-$(CONFIG_ECONET) += econet/
-obj-$(CONFIG_VLAN_8021Q) += 8021q/
+ifneq ($(CONFIG_VLAN_8021Q),)
+obj-y += 8021q/
+endif
obj-$(CONFIG_IP_DCCP) += dccp/
obj-$(CONFIG_IP_SCTP) += sctp/
obj-y += wireless/
diff --git a/net/atm/addr.c b/net/atm/addr.c
index 6afa77d63bb5..82e85abc303d 100644
--- a/net/atm/addr.c
+++ b/net/atm/addr.c
@@ -9,7 +9,7 @@
#include "signaling.h"
#include "addr.h"
-static int check_addr(struct sockaddr_atmsvc *addr)
+static int check_addr(const struct sockaddr_atmsvc *addr)
{
int i;
@@ -23,7 +23,7 @@ static int check_addr(struct sockaddr_atmsvc *addr)
return -EINVAL;
}
-static int identical(struct sockaddr_atmsvc *a, struct sockaddr_atmsvc *b)
+static int identical(const struct sockaddr_atmsvc *a, const struct sockaddr_atmsvc *b)
{
if (*a->sas_addr.prv)
if (memcmp(a->sas_addr.prv, b->sas_addr.prv, ATM_ESA_LEN))
@@ -35,7 +35,7 @@ static int identical(struct sockaddr_atmsvc *a, struct sockaddr_atmsvc *b)
return !strcmp(a->sas_addr.pub, b->sas_addr.pub);
}
-static void notify_sigd(struct atm_dev *dev)
+static void notify_sigd(const struct atm_dev *dev)
{
struct sockaddr_atmpvc pvc;
@@ -63,7 +63,7 @@ void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t atype)
notify_sigd(dev);
}
-int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
+int atm_add_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr,
enum atm_addr_type_t atype)
{
unsigned long flags;
@@ -98,7 +98,7 @@ int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
return 0;
}
-int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
+int atm_del_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr,
enum atm_addr_type_t atype)
{
unsigned long flags;
diff --git a/net/atm/addr.h b/net/atm/addr.h
index f39433ad45da..6837e9e7eb13 100644
--- a/net/atm/addr.h
+++ b/net/atm/addr.h
@@ -10,9 +10,9 @@
#include <linux/atmdev.h>
void atm_reset_addr(struct atm_dev *dev, enum atm_addr_type_t type);
-int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
+int atm_add_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr,
enum atm_addr_type_t type);
-int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr,
+int atm_del_addr(struct atm_dev *dev, const struct sockaddr_atmsvc *addr,
enum atm_addr_type_t type);
int atm_get_addr(struct atm_dev *dev, struct sockaddr_atmsvc __user *buf,
size_t size, enum atm_addr_type_t type);
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 05fafdc2eea3..8d9a6f158880 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -52,12 +52,12 @@ static void skb_debug(const struct sk_buff *skb)
#define ETHERTYPE_IPV6 0x86, 0xdd
#define PAD_BRIDGED 0x00, 0x00
-static unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 };
-static unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 };
-static unsigned char llc_oui_pid_pad[] =
+static const unsigned char ethertype_ipv4[] = { ETHERTYPE_IPV4 };
+static const unsigned char ethertype_ipv6[] = { ETHERTYPE_IPV6 };
+static const unsigned char llc_oui_pid_pad[] =
{ LLC, SNAP_BRIDGED, PID_ETHERNET, PAD_BRIDGED };
-static unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 };
-static unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 };
+static const unsigned char llc_oui_ipv4[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV4 };
+static const unsigned char llc_oui_ipv6[] = { LLC, SNAP_ROUTED, ETHERTYPE_IPV6 };
enum br2684_encaps {
e_vc = BR2684_ENCAPS_VC,
@@ -217,8 +217,8 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
return 1;
}
-static inline struct br2684_vcc *pick_outgoing_vcc(struct sk_buff *skb,
- struct br2684_dev *brdev)
+static inline struct br2684_vcc *pick_outgoing_vcc(const struct sk_buff *skb,
+ const struct br2684_dev *brdev)
{
return list_empty(&brdev->brvccs) ? NULL : list_entry_brvcc(brdev->brvccs.next); /* 1 vcc/dev right now */
}
diff --git a/net/atm/common.c b/net/atm/common.c
index c865517ba449..d34edbe754c8 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -262,7 +262,7 @@ static int adjust_tp(struct atm_trafprm *tp,unsigned char aal)
}
-static int check_ci(struct atm_vcc *vcc, short vpi, int vci)
+static int check_ci(const struct atm_vcc *vcc, short vpi, int vci)
{
struct hlist_head *head = &vcc_hash[vci &
(VCC_HTABLE_SIZE - 1)];
@@ -290,7 +290,7 @@ static int check_ci(struct atm_vcc *vcc, short vpi, int vci)
}
-static int find_ci(struct atm_vcc *vcc, short *vpi, int *vci)
+static int find_ci(const struct atm_vcc *vcc, short *vpi, int *vci)
{
static short p; /* poor man's per-device cache */
static int c;
@@ -646,7 +646,7 @@ static int atm_change_qos(struct atm_vcc *vcc,struct atm_qos *qos)
}
-static int check_tp(struct atm_trafprm *tp)
+static int check_tp(const struct atm_trafprm *tp)
{
/* @@@ Should be merged with adjust_tp */
if (!tp->traffic_class || tp->traffic_class == ATM_ANYCLASS) return 0;
@@ -663,7 +663,7 @@ static int check_tp(struct atm_trafprm *tp)
}
-static int check_qos(struct atm_qos *qos)
+static int check_qos(const struct atm_qos *qos)
{
int error;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 653aca3573ac..5799fb52365a 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -65,36 +65,36 @@ static int lec_close(struct net_device *dev);
static struct net_device_stats *lec_get_stats(struct net_device *dev);
static void lec_init(struct net_device *dev);
static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
- unsigned char *mac_addr);
+ const unsigned char *mac_addr);
static int lec_arp_remove(struct lec_priv *priv,
struct lec_arp_table *to_remove);
/* LANE2 functions */
-static void lane2_associate_ind(struct net_device *dev, u8 *mac_address,
- u8 *tlvs, u32 sizeoftlvs);
-static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
+static void lane2_associate_ind(struct net_device *dev, const u8 *mac_address,
+ const u8 *tlvs, u32 sizeoftlvs);
+static int lane2_resolve(struct net_device *dev, const u8 *dst_mac, int force,
u8 **tlvs, u32 *sizeoftlvs);
-static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
- u8 *tlvs, u32 sizeoftlvs);
+static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst,
+ const u8 *tlvs, u32 sizeoftlvs);
-static int lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr,
+static int lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr,
unsigned long permanent);
static void lec_arp_check_empties(struct lec_priv *priv,
struct atm_vcc *vcc, struct sk_buff *skb);
static void lec_arp_destroy(struct lec_priv *priv);
static void lec_arp_init(struct lec_priv *priv);
static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
- unsigned char *mac_to_find,
+ const unsigned char *mac_to_find,
int is_rdesc,
struct lec_arp_table **ret_entry);
-static void lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr,
- unsigned char *atm_addr, unsigned long remoteflag,
+static void lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
+ const unsigned char *atm_addr, unsigned long remoteflag,
unsigned int targetless_le_arp);
static void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id);
static int lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc);
static void lec_set_flush_tran_id(struct lec_priv *priv,
- unsigned char *atm_addr,
+ const unsigned char *atm_addr,
unsigned long tran_id);
-static void lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
+static void lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
struct atm_vcc *vcc,
void (*old_push) (struct atm_vcc *vcc,
struct sk_buff *skb));
@@ -634,7 +634,7 @@ static struct atm_dev lecatm_dev = {
*/
static int
send_to_lecd(struct lec_priv *priv, atmlec_msg_type type,
- unsigned char *mac_addr, unsigned char *atm_addr,
+ const unsigned char *mac_addr, const unsigned char *atm_addr,
struct sk_buff *data)
{
struct sock *sk;
@@ -705,10 +705,9 @@ static void lec_init(struct net_device *dev)
dev->set_multicast_list = lec_set_multicast_list;
dev->do_ioctl = NULL;
printk("%s: Initialized!\n", dev->name);
- return;
}
-static unsigned char lec_ctrl_magic[] = {
+static const unsigned char lec_ctrl_magic[] = {
0xff,
0x00,
0x01,
@@ -1276,7 +1275,7 @@ module_exit(lane_module_cleanup);
* lec will be used.
* If dst_mac == NULL, targetless LE_ARP will be sent
*/
-static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
+static int lane2_resolve(struct net_device *dev, const u8 *dst_mac, int force,
u8 **tlvs, u32 *sizeoftlvs)
{
unsigned long flags;
@@ -1322,8 +1321,8 @@ static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
* Returns 1 for success, 0 for failure (out of memory)
*
*/
-static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
- u8 *tlvs, u32 sizeoftlvs)
+static int lane2_associate_req(struct net_device *dev, const u8 *lan_dst,
+ const u8 *tlvs, u32 sizeoftlvs)
{
int retval;
struct sk_buff *skb;
@@ -1358,8 +1357,8 @@ static int lane2_associate_req(struct net_device *dev, u8 *lan_dst,
* LANE2: 3.1.5, LE_ASSOCIATE.indication
*
*/
-static void lane2_associate_ind(struct net_device *dev, u8 *mac_addr,
- u8 *tlvs, u32 sizeoftlvs)
+static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
+ const u8 *tlvs, u32 sizeoftlvs)
{
#if 0
int i = 0;
@@ -1744,7 +1743,7 @@ static void lec_arp_destroy(struct lec_priv *priv)
* Find entry by mac_address
*/
static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
- unsigned char *mac_addr)
+ const unsigned char *mac_addr)
{
struct hlist_node *node;
struct hlist_head *head;
@@ -1764,7 +1763,7 @@ static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
}
static struct lec_arp_table *make_entry(struct lec_priv *priv,
- unsigned char *mac_addr)
+ const unsigned char *mac_addr)
{
struct lec_arp_table *to_return;
@@ -1921,7 +1920,7 @@ restart:
*
*/
static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
- unsigned char *mac_to_find, int is_rdesc,
+ const unsigned char *mac_to_find, int is_rdesc,
struct lec_arp_table **ret_entry)
{
unsigned long flags;
@@ -2017,7 +2016,7 @@ out:
}
static int
-lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr,
+lec_addr_delete(struct lec_priv *priv, const unsigned char *atm_addr,
unsigned long permanent)
{
unsigned long flags;
@@ -2047,8 +2046,8 @@ lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr,
* Notifies: Response to arp_request (atm_addr != NULL)
*/
static void
-lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr,
- unsigned char *atm_addr, unsigned long remoteflag,
+lec_arp_update(struct lec_priv *priv, const unsigned char *mac_addr,
+ const unsigned char *atm_addr, unsigned long remoteflag,
unsigned int targetless_le_arp)
{
unsigned long flags;
@@ -2148,7 +2147,7 @@ out:
* Notifies: Vcc setup ready
*/
static void
-lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
+lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
struct atm_vcc *vcc,
void (*old_push) (struct atm_vcc *vcc, struct sk_buff *skb))
{
@@ -2336,7 +2335,7 @@ restart:
static void
lec_set_flush_tran_id(struct lec_priv *priv,
- unsigned char *atm_addr, unsigned long tran_id)
+ const unsigned char *atm_addr, unsigned long tran_id)
{
unsigned long flags;
struct hlist_node *node;
diff --git a/net/atm/lec.h b/net/atm/lec.h
index b41cda7ea1e1..0d376682c1a3 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -42,12 +42,12 @@ struct lecdatahdr_8025 {
*
*/
struct lane2_ops {
- int (*resolve) (struct net_device *dev, u8 *dst_mac, int force,
+ int (*resolve) (struct net_device *dev, const u8 *dst_mac, int force,
u8 **tlvs, u32 *sizeoftlvs);
- int (*associate_req) (struct net_device *dev, u8 *lan_dst,
- u8 *tlvs, u32 sizeoftlvs);
- void (*associate_indicator) (struct net_device *dev, u8 *mac_addr,
- u8 *tlvs, u32 sizeoftlvs);
+ int (*associate_req) (struct net_device *dev, const u8 *lan_dst,
+ const u8 *tlvs, u32 sizeoftlvs);
+ void (*associate_indicator) (struct net_device *dev, const u8 *mac_addr,
+ const u8 *tlvs, u32 sizeoftlvs);
};
/*
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 2712544cf0ca..97eaa23ad9ea 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -893,13 +893,11 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
sk->sk_destruct = ax25_free_sock;
sk->sk_type = osk->sk_type;
- sk->sk_socket = osk->sk_socket;
sk->sk_priority = osk->sk_priority;
sk->sk_protocol = osk->sk_protocol;
sk->sk_rcvbuf = osk->sk_rcvbuf;
sk->sk_sndbuf = osk->sk_sndbuf;
sk->sk_state = TCP_ESTABLISHED;
- sk->sk_sleep = osk->sk_sleep;
sock_copy_flags(sk, osk);
oax25 = ax25_sk(osk);
@@ -1361,13 +1359,11 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
goto out;
newsk = skb->sk;
- newsk->sk_socket = newsock;
- newsk->sk_sleep = &newsock->wait;
+ sock_graft(newsk, newsock);
/* Now attach up the new socket */
kfree_skb(skb);
sk->sk_ack_backlog--;
- newsock->sk = newsk;
newsock->state = SS_CONNECTED;
out:
diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c
index 96e4b9273250..cdc7e751ef36 100644
--- a/net/ax25/ax25_std_timer.c
+++ b/net/ax25/ax25_std_timer.c
@@ -39,11 +39,9 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25)
switch (ax25->state) {
case AX25_STATE_0:
- /* Magic here: If we listen() and a new link dies before it
- is accepted() it isn't 'dead' so doesn't get removed. */
- if (!sk || sock_flag(sk, SOCK_DESTROY) ||
- (sk->sk_state == TCP_LISTEN &&
- sock_flag(sk, SOCK_DEAD))) {
+ if (!sk ||
+ sock_flag(sk, SOCK_DESTROY) ||
+ sock_flag(sk, SOCK_DEAD)) {
if (sk) {
sock_hold(sk);
ax25_destroy_socket(ax25);
diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h
index e69244dd8de8..b69bf4e7c48b 100644
--- a/net/bluetooth/bnep/bnep.h
+++ b/net/bluetooth/bnep/bnep.h
@@ -16,10 +16,6 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-/*
- * $Id: bnep.h,v 1.5 2002/08/04 21:23:58 maxk Exp $
- */
-
#ifndef _BNEP_H
#define _BNEP_H
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 24e91eb7f649..021172c0e666 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -25,10 +25,6 @@
SOFTWARE IS DISCLAIMED.
*/
-/*
- * $Id: core.c,v 1.20 2002/08/04 21:23:58 maxk Exp $
- */
-
#include <linux/module.h>
#include <linux/kernel.h>
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 95e3837e4312..d9fa0ab2c87f 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -25,10 +25,6 @@
SOFTWARE IS DISCLAIMED.
*/
-/*
- * $Id: netdev.c,v 1.8 2002/08/04 21:23:58 maxk Exp $
- */
-
#include <linux/module.h>
#include <linux/socket.h>
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 201e5b1ce473..8ffb57f2303a 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -24,10 +24,6 @@
SOFTWARE IS DISCLAIMED.
*/
-/*
- * $Id: sock.c,v 1.4 2002/08/04 21:23:58 maxk Exp $
- */
-
#include <linux/module.h>
#include <linux/types.h>
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index b6b3d9b4066f..6cfc7ba611b3 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -23,8 +23,6 @@
/*
* Bluetooth RFCOMM core.
- *
- * $Id: core.c,v 1.42 2002/10/01 23:26:25 maxk Exp $
*/
#include <linux/module.h>
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index c3ed076481d8..8a972b6ba85f 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -23,8 +23,6 @@
/*
* RFCOMM sockets.
- *
- * $Id: sock.c,v 1.24 2002/10/03 01:00:34 maxk Exp $
*/
#include <linux/module.h>
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index ec22ebe0c2c8..5d163571d3f7 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -23,8 +23,6 @@
/*
* RFCOMM TTY.
- *
- * $Id: tty.c,v 1.24 2002/10/03 01:54:38 holtmann Exp $
*/
#include <linux/module.h>
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 12265aff7099..e143ca678881 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -5,6 +5,7 @@
config BRIDGE
tristate "802.1d Ethernet Bridging"
select LLC
+ select STP
---help---
If you say Y here, then your Linux box will be able to act as an
Ethernet bridge, which means that the different Ethernet segments it
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 8f3c58e5f7a5..573acdf6f9ff 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -5,8 +5,6 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br.c,v 1.47 2001/12/24 00:56:41 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -20,21 +18,24 @@
#include <linux/init.h>
#include <linux/llc.h>
#include <net/llc.h>
+#include <net/stp.h>
#include "br_private.h"
int (*br_should_route_hook)(struct sk_buff *skb);
-static struct llc_sap *br_stp_sap;
+static const struct stp_proto br_stp_proto = {
+ .rcv = br_stp_rcv,
+};
static int __init br_init(void)
{
int err;
- br_stp_sap = llc_sap_open(LLC_SAP_BSPAN, br_stp_rcv);
- if (!br_stp_sap) {
+ err = stp_proto_register(&br_stp_proto);
+ if (err < 0) {
printk(KERN_ERR "bridge: can't register sap for STP\n");
- return -EADDRINUSE;
+ return err;
}
err = br_fdb_init();
@@ -67,13 +68,13 @@ err_out2:
err_out1:
br_fdb_fini();
err_out:
- llc_sap_put(br_stp_sap);
+ stp_proto_unregister(&br_stp_proto);
return err;
}
static void __exit br_deinit(void)
{
- rcu_assign_pointer(br_stp_sap->rcv_func, NULL);
+ stp_proto_unregister(&br_stp_proto);
br_netlink_fini();
unregister_netdevice_notifier(&br_device_notifier);
@@ -84,7 +85,6 @@ static void __exit br_deinit(void)
synchronize_net();
br_netfilter_fini();
- llc_sap_put(br_stp_sap);
br_fdb_get_hook = NULL;
br_fdb_put_hook = NULL;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index bf7787395fe0..d9449df7cad5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -5,8 +5,6 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_device.c,v 1.6 2001/12/24 00:59:55 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -21,12 +19,6 @@
#include <asm/uaccess.h>
#include "br_private.h"
-static struct net_device_stats *br_dev_get_stats(struct net_device *dev)
-{
- struct net_bridge *br = netdev_priv(dev);
- return &br->statistics;
-}
-
/* net device transmit always called with no BH (preempt_disabled) */
int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
@@ -34,8 +26,8 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
const unsigned char *dest = skb->data;
struct net_bridge_fdb_entry *dst;
- br->statistics.tx_packets++;
- br->statistics.tx_bytes += skb->len;
+ dev->stats.tx_packets++;
+ dev->stats.tx_bytes += skb->len;
skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
@@ -95,6 +87,7 @@ static int br_set_mac_address(struct net_device *dev, void *p)
spin_lock_bh(&br->lock);
memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
br_stp_change_bridge_id(br, addr->sa_data);
+ br->flags |= BR_SET_MAC_ADDR;
spin_unlock_bh(&br->lock);
return 0;
@@ -161,7 +154,6 @@ void br_dev_setup(struct net_device *dev)
ether_setup(dev);
dev->do_ioctl = br_dev_ioctl;
- dev->get_stats = br_dev_get_stats;
dev->hard_start_xmit = br_dev_xmit;
dev->open = br_dev_open;
dev->set_multicast_list = br_dev_set_multicast_list;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 72c5976a5ce3..a48f5efdb6bf 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -5,8 +5,6 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_fdb.c,v 1.6 2002/01/17 00:57:07 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -15,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/rculist.h>
#include <linux/spinlock.h>
#include <linux/times.h>
#include <linux/netdevice.h>
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index bdd7c35c3c7b..bdd9ccea17ce 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -5,8 +5,6 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_forward.c,v 1.4 2001/08/14 22:05:57 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -91,7 +89,7 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
/* called with rcu_read_lock */
void br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
{
- if (should_deliver(to, skb)) {
+ if (!skb_warn_if_lro(skb) && should_deliver(to, skb)) {
__br_forward(to, skb);
return;
}
@@ -115,7 +113,7 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
struct sk_buff *skb2;
if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) {
- br->statistics.tx_dropped++;
+ br->dev->stats.tx_dropped++;
kfree_skb(skb);
return;
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c2397f503b0f..a072ea5ca6f5 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -5,8 +5,6 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_if.c,v 1.7 2001/12/24 00:59:55 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -375,6 +373,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
if (IS_ERR(p))
return PTR_ERR(p);
+ err = dev_set_promiscuity(dev, 1);
+ if (err)
+ goto put_back;
+
err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
SYSFS_BRIDGE_PORT_ATTR);
if (err)
@@ -389,7 +391,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
goto err2;
rcu_assign_pointer(dev->br_port, p);
- dev_set_promiscuity(dev, 1);
+ dev_disable_lro(dev);
list_add_rcu(&p->list, &br->port_list);
@@ -413,12 +415,12 @@ err2:
br_fdb_delete_by_port(br, p, 1);
err1:
kobject_del(&p->kobj);
- goto put_back;
err0:
kobject_put(&p->kobj);
-
+ dev_set_promiscuity(dev, -1);
put_back:
dev_put(dev);
+ kfree(p);
return err;
}
@@ -442,12 +444,16 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
void __exit br_cleanup_bridges(void)
{
- struct net_device *dev, *nxt;
+ struct net_device *dev;
rtnl_lock();
- for_each_netdev_safe(&init_net, dev, nxt)
- if (dev->priv_flags & IFF_EBRIDGE)
+restart:
+ for_each_netdev(&init_net, dev) {
+ if (dev->priv_flags & IFF_EBRIDGE) {
del_br(dev->priv);
+ goto restart;
+ }
+ }
rtnl_unlock();
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 255c00f60ce7..30b88777c3df 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -5,8 +5,6 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_input.c,v 1.10 2001/12/24 04:50:20 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -24,13 +22,13 @@ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb)
{
- struct net_device *indev;
+ struct net_device *indev, *brdev = br->dev;
- br->statistics.rx_packets++;
- br->statistics.rx_bytes += skb->len;
+ brdev->stats.rx_packets++;
+ brdev->stats.rx_bytes += skb->len;
indev = skb->dev;
- skb->dev = br->dev;
+ skb->dev = brdev;
NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
netif_receive_skb);
@@ -64,7 +62,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
dst = NULL;
if (is_multicast_ether_addr(dest)) {
- br->statistics.multicast++;
+ br->dev->stats.multicast++;
skb2 = skb;
} else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) {
skb2 = skb;
@@ -136,14 +134,11 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_PAUSE))
goto drop;
- /* Process STP BPDU's through normal netif_receive_skb() path */
- if (p->br->stp_enabled != BR_NO_STP) {
- if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
- NULL, br_handle_local_finish))
- return NULL;
- else
- return skb;
- }
+ if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
+ NULL, br_handle_local_finish))
+ return NULL; /* frame consumed by filter */
+ else
+ return skb; /* continue processing */
}
switch (p->state) {
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 0655a5f07f58..eeee218eed80 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -5,8 +5,6 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_ioctl.c,v 1.4 2000/11/08 05:16:40 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 00644a544e3c..88d8ec7b3142 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -5,8 +5,6 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_notify.c,v 1.2 2000/02/21 15:51:34 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c11b554fd109..815ed38925b2 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -4,8 +4,6 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_private.h,v 1.7 2001/12/24 00:59:55 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -90,11 +88,12 @@ struct net_bridge
spinlock_t lock;
struct list_head port_list;
struct net_device *dev;
- struct net_device_stats statistics;
spinlock_t hash_lock;
struct hlist_head hash[BR_HASH_SIZE];
struct list_head age_list;
unsigned long feature_mask;
+ unsigned long flags;
+#define BR_SET_MAC_ADDR 0x00000001
/* STP */
bridge_id designated_root;
@@ -227,8 +226,9 @@ extern void br_stp_set_path_cost(struct net_bridge_port *p,
extern ssize_t br_show_bridge_id(char *buf, const struct bridge_id *id);
/* br_stp_bpdu.c */
-extern int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev);
+struct stp_proto;
+extern void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
+ struct net_device *dev);
/* br_stp_timer.c */
extern void br_stp_timer_init(struct net_bridge *br);
diff --git a/net/bridge/br_private_stp.h b/net/bridge/br_private_stp.h
index e29f01ac1adf..8b650f7fbfa0 100644
--- a/net/bridge/br_private_stp.h
+++ b/net/bridge/br_private_stp.h
@@ -4,8 +4,6 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_private_stp.h,v 1.3 2001/02/05 06:03:47 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index e38034aa56f5..921bbe5cb94a 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -5,14 +5,13 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_stp.c,v 1.4 2000/06/19 10:13:35 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
+#include <linux/rculist.h>
#include "br_private.h"
#include "br_private_stp.h"
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index ddeb6e5d45d6..996476174517 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -5,8 +5,6 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_stp_bpdu.c,v 1.3 2001/11/10 02:35:25 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -20,6 +18,7 @@
#include <net/net_namespace.h>
#include <net/llc.h>
#include <net/llc_pdu.h>
+#include <net/stp.h>
#include <asm/unaligned.h>
#include "br_private.h"
@@ -133,10 +132,9 @@ void br_send_tcn_bpdu(struct net_bridge_port *p)
*
* NO locks, but rcu_read_lock (preempt_disabled)
*/
-int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
+ struct net_device *dev)
{
- const struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb);
const unsigned char *dest = eth_hdr(skb)->h_dest;
struct net_bridge_port *p = rcu_dereference(dev->br_port);
struct net_bridge *br;
@@ -148,11 +146,6 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
if (!p)
goto err;
- if (pdu->ssap != LLC_SAP_BSPAN
- || pdu->dsap != LLC_SAP_BSPAN
- || pdu->ctrl_1 != LLC_PDU_TYPE_U)
- goto err;
-
if (!pskb_may_pull(skb, 4))
goto err;
@@ -226,5 +219,4 @@ int br_stp_rcv(struct sk_buff *skb, struct net_device *dev,
spin_unlock(&br->lock);
err:
kfree_skb(skb);
- return 0;
}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 1a430eccec9b..9a52ac5b4525 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -5,8 +5,6 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_stp_if.c,v 1.4 2001/04/14 21:14:39 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -216,6 +214,10 @@ void br_stp_recalculate_bridge_id(struct net_bridge *br)
const unsigned char *addr = br_mac_zero;
struct net_bridge_port *p;
+ /* user has chosen a value so keep it */
+ if (br->flags & BR_SET_MAC_ADDR)
+ return;
+
list_for_each_entry(p, &br->port_list, list) {
if (addr == br_mac_zero ||
memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0)
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 77f5255e6915..772a140bfdf0 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -5,8 +5,6 @@
* Authors:
* Lennert Buytenhek <buytenh@gnu.org>
*
- * $Id: br_stp_timer.c,v 1.3 2000/05/05 02:17:17 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index 7beeefa0f9c0..909479794999 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -83,6 +83,15 @@ config BRIDGE_EBT_IP
To compile it as a module, choose M here. If unsure, say N.
+config BRIDGE_EBT_IP6
+ tristate "ebt: IP6 filter support"
+ depends on BRIDGE_NF_EBTABLES && IPV6
+ help
+ This option adds the IP6 match, which allows basic IPV6 header field
+ filtering.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config BRIDGE_EBT_LIMIT
tristate "ebt: limit match support"
depends on BRIDGE_NF_EBTABLES
@@ -221,7 +230,7 @@ config BRIDGE_EBT_NFLOG
either the old LOG target, the old ULOG target or nfnetlink_log
as backend.
- This option adds the ulog watcher, that you can use in any rule
+ This option adds the nflog watcher, that you can use in any rule
in any ebtables table.
To compile it as a module, choose M here. If unsure, say N.
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 83715d73a503..0718699540b0 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_BRIDGE_EBT_802_3) += ebt_802_3.o
obj-$(CONFIG_BRIDGE_EBT_AMONG) += ebt_among.o
obj-$(CONFIG_BRIDGE_EBT_ARP) += ebt_arp.o
obj-$(CONFIG_BRIDGE_EBT_IP) += ebt_ip.o
+obj-$(CONFIG_BRIDGE_EBT_IP6) += ebt_ip6.o
obj-$(CONFIG_BRIDGE_EBT_LIMIT) += ebt_limit.o
obj-$(CONFIG_BRIDGE_EBT_MARK) += ebt_mark_m.o
obj-$(CONFIG_BRIDGE_EBT_PKTTYPE) += ebt_pkttype.o
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
new file mode 100644
index 000000000000..36efb3a75249
--- /dev/null
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -0,0 +1,144 @@
+/*
+ * ebt_ip6
+ *
+ * Authors:
+ * Manohar Castelino <manohar.r.castelino@intel.com>
+ * Kuo-Lang Tseng <kuo-lang.tseng@intel.com>
+ * Jan Engelhardt <jengelh@computergmbh.de>
+ *
+ * Summary:
+ * This is just a modification of the IPv4 code written by
+ * Bart De Schuymer <bdschuym@pandora.be>
+ * with the changes required to support IPv6
+ *
+ * Jan, 2008
+ */
+
+#include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_bridge/ebt_ip6.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <linux/in.h>
+#include <linux/module.h>
+#include <net/dsfield.h>
+
+struct tcpudphdr {
+ __be16 src;
+ __be16 dst;
+};
+
+static int ebt_filter_ip6(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out, const void *data,
+ unsigned int datalen)
+{
+ const struct ebt_ip6_info *info = (struct ebt_ip6_info *)data;
+ const struct ipv6hdr *ih6;
+ struct ipv6hdr _ip6h;
+ const struct tcpudphdr *pptr;
+ struct tcpudphdr _ports;
+ struct in6_addr tmp_addr;
+ int i;
+
+ ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
+ if (ih6 == NULL)
+ return EBT_NOMATCH;
+ if (info->bitmask & EBT_IP6_TCLASS &&
+ FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS))
+ return EBT_NOMATCH;
+ for (i = 0; i < 4; i++)
+ tmp_addr.in6_u.u6_addr32[i] = ih6->saddr.in6_u.u6_addr32[i] &
+ info->smsk.in6_u.u6_addr32[i];
+ if (info->bitmask & EBT_IP6_SOURCE &&
+ FWINV((ipv6_addr_cmp(&tmp_addr, &info->saddr) != 0),
+ EBT_IP6_SOURCE))
+ return EBT_NOMATCH;
+ for (i = 0; i < 4; i++)
+ tmp_addr.in6_u.u6_addr32[i] = ih6->daddr.in6_u.u6_addr32[i] &
+ info->dmsk.in6_u.u6_addr32[i];
+ if (info->bitmask & EBT_IP6_DEST &&
+ FWINV((ipv6_addr_cmp(&tmp_addr, &info->daddr) != 0), EBT_IP6_DEST))
+ return EBT_NOMATCH;
+ if (info->bitmask & EBT_IP6_PROTO) {
+ uint8_t nexthdr = ih6->nexthdr;
+ int offset_ph;
+
+ offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr);
+ if (offset_ph == -1)
+ return EBT_NOMATCH;
+ if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
+ return EBT_NOMATCH;
+ if (!(info->bitmask & EBT_IP6_DPORT) &&
+ !(info->bitmask & EBT_IP6_SPORT))
+ return EBT_MATCH;
+ pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports),
+ &_ports);
+ if (pptr == NULL)
+ return EBT_NOMATCH;
+ if (info->bitmask & EBT_IP6_DPORT) {
+ u32 dst = ntohs(pptr->dst);
+ if (FWINV(dst < info->dport[0] ||
+ dst > info->dport[1], EBT_IP6_DPORT))
+ return EBT_NOMATCH;
+ }
+ if (info->bitmask & EBT_IP6_SPORT) {
+ u32 src = ntohs(pptr->src);
+ if (FWINV(src < info->sport[0] ||
+ src > info->sport[1], EBT_IP6_SPORT))
+ return EBT_NOMATCH;
+ }
+ return EBT_MATCH;
+ }
+ return EBT_MATCH;
+}
+
+static int ebt_ip6_check(const char *tablename, unsigned int hookmask,
+ const struct ebt_entry *e, void *data, unsigned int datalen)
+{
+ struct ebt_ip6_info *info = (struct ebt_ip6_info *)data;
+
+ if (datalen != EBT_ALIGN(sizeof(struct ebt_ip6_info)))
+ return -EINVAL;
+ if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO)
+ return -EINVAL;
+ if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK)
+ return -EINVAL;
+ if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) {
+ if (info->invflags & EBT_IP6_PROTO)
+ return -EINVAL;
+ if (info->protocol != IPPROTO_TCP &&
+ info->protocol != IPPROTO_UDP &&
+ info->protocol != IPPROTO_UDPLITE &&
+ info->protocol != IPPROTO_SCTP &&
+ info->protocol != IPPROTO_DCCP)
+ return -EINVAL;
+ }
+ if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1])
+ return -EINVAL;
+ if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
+ return -EINVAL;
+ return 0;
+}
+
+static struct ebt_match filter_ip6 =
+{
+ .name = EBT_IP6_MATCH,
+ .match = ebt_filter_ip6,
+ .check = ebt_ip6_check,
+ .me = THIS_MODULE,
+};
+
+static int __init ebt_ip6_init(void)
+{
+ return ebt_register_match(&filter_ip6);
+}
+
+static void __exit ebt_ip6_fini(void)
+{
+ ebt_unregister_match(&filter_ip6);
+}
+
+module_init(ebt_ip6_init);
+module_exit(ebt_ip6_fini);
+MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match");
+MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 0b209e4aad0a..2f430d4ae911 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -18,6 +18,9 @@
#include <linux/if_arp.h>
#include <linux/spinlock.h>
#include <net/netfilter/nf_log.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <linux/in6.h>
static DEFINE_SPINLOCK(ebt_log_lock);
@@ -58,6 +61,27 @@ static void print_MAC(const unsigned char *p)
printk("%02x%c", *p, i == ETH_ALEN - 1 ? ' ':':');
}
+static void
+print_ports(const struct sk_buff *skb, uint8_t protocol, int offset)
+{
+ if (protocol == IPPROTO_TCP ||
+ protocol == IPPROTO_UDP ||
+ protocol == IPPROTO_UDPLITE ||
+ protocol == IPPROTO_SCTP ||
+ protocol == IPPROTO_DCCP) {
+ const struct tcpudphdr *pptr;
+ struct tcpudphdr _ports;
+
+ pptr = skb_header_pointer(skb, offset,
+ sizeof(_ports), &_ports);
+ if (pptr == NULL) {
+ printk(" INCOMPLETE TCP/UDP header");
+ return;
+ }
+ printk(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst));
+ }
+}
+
#define myNIPQUAD(a) a[0], a[1], a[2], a[3]
static void
ebt_log_packet(unsigned int pf, unsigned int hooknum,
@@ -95,25 +119,35 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum,
printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u, IP "
"tos=0x%02X, IP proto=%d", NIPQUAD(ih->saddr),
NIPQUAD(ih->daddr), ih->tos, ih->protocol);
- if (ih->protocol == IPPROTO_TCP ||
- ih->protocol == IPPROTO_UDP ||
- ih->protocol == IPPROTO_UDPLITE ||
- ih->protocol == IPPROTO_SCTP ||
- ih->protocol == IPPROTO_DCCP) {
- const struct tcpudphdr *pptr;
- struct tcpudphdr _ports;
-
- pptr = skb_header_pointer(skb, ih->ihl*4,
- sizeof(_ports), &_ports);
- if (pptr == NULL) {
- printk(" INCOMPLETE TCP/UDP header");
- goto out;
- }
- printk(" SPT=%u DPT=%u", ntohs(pptr->src),
- ntohs(pptr->dst));
+ print_ports(skb, ih->protocol, ih->ihl*4);
+ goto out;
+ }
+
+#if defined(CONFIG_BRIDGE_EBT_IP6) || defined(CONFIG_BRIDGE_EBT_IP6_MODULE)
+ if ((bitmask & EBT_LOG_IP6) && eth_hdr(skb)->h_proto ==
+ htons(ETH_P_IPV6)) {
+ const struct ipv6hdr *ih;
+ struct ipv6hdr _iph;
+ uint8_t nexthdr;
+ int offset_ph;
+
+ ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
+ if (ih == NULL) {
+ printk(" INCOMPLETE IPv6 header");
+ goto out;
}
+ printk(" IPv6 SRC=%x:%x:%x:%x:%x:%x:%x:%x "
+ "IPv6 DST=%x:%x:%x:%x:%x:%x:%x:%x, IPv6 "
+ "priority=0x%01X, Next Header=%d", NIP6(ih->saddr),
+ NIP6(ih->daddr), ih->priority, ih->nexthdr);
+ nexthdr = ih->nexthdr;
+ offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr);
+ if (offset_ph == -1)
+ goto out;
+ print_ports(skb, nexthdr, offset_ph);
goto out;
}
+#endif
if ((bitmask & EBT_LOG_ARP) &&
((eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) ||
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 7e8ca2836452..484bbf6dd032 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -205,12 +205,19 @@ static int can_create(struct net *net, struct socket *sock, int protocol)
* -ENOBUFS on full driver queue (see net_xmit_errno())
* -ENOMEM when local loopback failed at calling skb_clone()
* -EPERM when trying to send on a non-CAN interface
+ * -EINVAL when the skb->data does not contain a valid CAN frame
*/
int can_send(struct sk_buff *skb, int loop)
{
struct sk_buff *newskb = NULL;
+ struct can_frame *cf = (struct can_frame *)skb->data;
int err;
+ if (skb->len != sizeof(struct can_frame) || cf->can_dlc > 8) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
if (skb->dev->type != ARPHRD_CAN) {
kfree_skb(skb);
return -EPERM;
@@ -605,6 +612,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
struct dev_rcv_lists *d;
+ struct can_frame *cf = (struct can_frame *)skb->data;
int matches;
if (dev->type != ARPHRD_CAN || dev_net(dev) != &init_net) {
@@ -612,6 +620,8 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
return 0;
}
+ BUG_ON(skb->len != sizeof(struct can_frame) || cf->can_dlc > 8);
+
/* update statistics */
can_stats.rx_frames++;
can_stats.rx_frames_delta++;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index d9a3a9d13bed..72c2ce904f83 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -298,7 +298,7 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
if (head->nframes) {
/* can_frames starting here */
- firstframe = (struct can_frame *) skb_tail_pointer(skb);
+ firstframe = (struct can_frame *)skb_tail_pointer(skb);
memcpy(skb_put(skb, datalen), frames, datalen);
@@ -826,6 +826,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
for (i = 0; i < msg_head->nframes; i++) {
err = memcpy_fromiovec((u8 *)&op->frames[i],
msg->msg_iov, CFSIZ);
+
+ if (op->frames[i].can_dlc > 8)
+ err = -EINVAL;
+
if (err < 0)
return err;
@@ -858,6 +862,10 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
for (i = 0; i < msg_head->nframes; i++) {
err = memcpy_fromiovec((u8 *)&op->frames[i],
msg->msg_iov, CFSIZ);
+
+ if (op->frames[i].can_dlc > 8)
+ err = -EINVAL;
+
if (err < 0) {
if (op->frames != &op->sframe)
kfree(op->frames);
@@ -1164,9 +1172,12 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
skb->dev = dev;
skb->sk = sk;
- can_send(skb, 1); /* send with loopback */
+ err = can_send(skb, 1); /* send with loopback */
dev_put(dev);
+ if (err)
+ return err;
+
return CFSIZ + MHSIZ;
}
@@ -1185,6 +1196,10 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
if (!bo->bound)
return -ENOTCONN;
+ /* check for valid message length from userspace */
+ if (size < MHSIZ || (size - MHSIZ) % CFSIZ)
+ return -EINVAL;
+
/* check for alternative ifindex for this bcm_op */
if (!ifindex && msg->msg_name) {
@@ -1259,8 +1274,8 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
break;
case TX_SEND:
- /* we need at least one can_frame */
- if (msg_head.nframes < 1)
+ /* we need exactly one can_frame behind the msg head */
+ if ((msg_head.nframes != 1) || (size != CFSIZ + MHSIZ))
ret = -EINVAL;
else
ret = bcm_tx_send(msg, ifindex, sk);
diff --git a/net/can/raw.c b/net/can/raw.c
index 69877b8e7e9c..3e46ee36a1aa 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -632,6 +632,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
} else
ifindex = ro->ifindex;
+ if (size != sizeof(struct can_frame))
+ return -EINVAL;
+
dev = dev_get_by_index(&init_net, ifindex);
if (!dev)
return -ENXIO;
diff --git a/net/core/dev.c b/net/core/dev.c
index fca23a3bf12c..065b9817e209 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -90,6 +90,7 @@
#include <linux/if_ether.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
#include <net/net_namespace.h>
@@ -120,6 +121,9 @@
#include <linux/ctype.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
#include "net-sysfs.h"
@@ -257,7 +261,7 @@ DEFINE_PER_CPU(struct softnet_data, softnet_data);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
- * register_netdevice() inits dev->_xmit_lock and sets lockdep class
+ * register_netdevice() inits txq->_xmit_lock and sets lockdep class
* according to dev->type
*/
static const unsigned short netdev_lock_type[] =
@@ -961,6 +965,12 @@ void netdev_state_change(struct net_device *dev)
}
}
+void netdev_bonding_change(struct net_device *dev)
+{
+ call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
+}
+EXPORT_SYMBOL(netdev_bonding_change);
+
/**
* dev_load - load a network module
* @net: the applicable net namespace
@@ -1117,6 +1127,29 @@ int dev_close(struct net_device *dev)
}
+/**
+ * dev_disable_lro - disable Large Receive Offload on a device
+ * @dev: device
+ *
+ * Disable Large Receive Offload (LRO) on a net device. Must be
+ * called under RTNL. This is needed if received packets may be
+ * forwarded to another interface.
+ */
+void dev_disable_lro(struct net_device *dev)
+{
+ if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
+ dev->ethtool_ops->set_flags) {
+ u32 flags = dev->ethtool_ops->get_flags(dev);
+ if (flags & ETH_FLAG_LRO) {
+ flags &= ~ETH_FLAG_LRO;
+ dev->ethtool_ops->set_flags(dev, flags);
+ }
+ }
+ WARN_ON(dev->features & NETIF_F_LRO);
+}
+EXPORT_SYMBOL(dev_disable_lro);
+
+
static int dev_boot_phase = 1;
/*
@@ -1290,16 +1323,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
}
-void __netif_schedule(struct net_device *dev)
+void __netif_schedule(struct Qdisc *q)
{
- if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
- unsigned long flags;
+ BUG_ON(q == &noop_qdisc);
+
+ if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
struct softnet_data *sd;
+ unsigned long flags;
local_irq_save(flags);
sd = &__get_cpu_var(softnet_data);
- dev->next_sched = sd->output_queue;
- sd->output_queue = dev;
+ q->next_sched = sd->output_queue;
+ sd->output_queue = q;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
}
@@ -1566,7 +1601,8 @@ static int dev_gso_segment(struct sk_buff *skb)
return 0;
}
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+ struct netdev_queue *txq)
{
if (likely(!skb->next)) {
if (!list_empty(&ptype_all))
@@ -1595,9 +1631,7 @@ gso:
skb->next = nskb;
return rc;
}
- if (unlikely((netif_queue_stopped(dev) ||
- netif_subqueue_stopped(dev, skb)) &&
- skb->next))
+ if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);
@@ -1634,9 +1668,71 @@ out_kfree_skb:
* --BLG
*/
+static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
+{
+ u32 *addr, *ports, hash, ihl;
+ u8 ip_proto;
+ int alen;
+
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_IP):
+ ip_proto = ip_hdr(skb)->protocol;
+ addr = &ip_hdr(skb)->saddr;
+ ihl = ip_hdr(skb)->ihl;
+ alen = 2;
+ break;
+ case __constant_htons(ETH_P_IPV6):
+ ip_proto = ipv6_hdr(skb)->nexthdr;
+ addr = &ipv6_hdr(skb)->saddr.s6_addr32[0];
+ ihl = (40 >> 2);
+ alen = 8;
+ break;
+ default:
+ return 0;
+ }
+
+ ports = (u32 *) (skb_network_header(skb) + (ihl * 4));
+
+ hash = 0;
+ while (alen--)
+ hash ^= *addr++;
+
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_DCCP:
+ case IPPROTO_ESP:
+ case IPPROTO_AH:
+ case IPPROTO_SCTP:
+ case IPPROTO_UDPLITE:
+ hash ^= *ports;
+ break;
+
+ default:
+ break;
+ }
+
+ return hash % dev->real_num_tx_queues;
+}
+
+static struct netdev_queue *dev_pick_tx(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ u16 queue_index = 0;
+
+ if (dev->select_queue)
+ queue_index = dev->select_queue(dev, skb);
+ else if (dev->real_num_tx_queues > 1)
+ queue_index = simple_tx_hash(dev, skb);
+
+ skb_set_queue_mapping(skb, queue_index);
+ return netdev_get_tx_queue(dev, queue_index);
+}
+
int dev_queue_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
+ struct netdev_queue *txq;
struct Qdisc *q;
int rc = -ENOMEM;
@@ -1669,44 +1765,29 @@ int dev_queue_xmit(struct sk_buff *skb)
}
gso:
- spin_lock_prefetch(&dev->queue_lock);
-
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
rcu_read_lock_bh();
- /* Updates of qdisc are serialized by queue_lock.
- * The struct Qdisc which is pointed to by qdisc is now a
- * rcu structure - it may be accessed without acquiring
- * a lock (but the structure may be stale.) The freeing of the
- * qdisc will be deferred until it's known that there are no
- * more references to it.
- *
- * If the qdisc has an enqueue function, we still need to
- * hold the queue_lock before calling it, since queue_lock
- * also serializes access to the device queue.
- */
+ txq = dev_pick_tx(dev, skb);
+ q = rcu_dereference(txq->qdisc);
- q = rcu_dereference(dev->qdisc);
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
#endif
if (q->enqueue) {
- /* Grab device queue */
- spin_lock(&dev->queue_lock);
- q = dev->qdisc;
- if (q->enqueue) {
- /* reset queue_mapping to zero */
- skb_set_queue_mapping(skb, 0);
- rc = q->enqueue(skb, q);
- qdisc_run(dev);
- spin_unlock(&dev->queue_lock);
-
- rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
- goto out;
- }
- spin_unlock(&dev->queue_lock);
+ spinlock_t *root_lock = qdisc_root_lock(q);
+
+ spin_lock(root_lock);
+
+ rc = q->enqueue(skb, q);
+ qdisc_run(q);
+
+ spin_unlock(root_lock);
+
+ rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
+ goto out;
}
/* The device has no queue. Common case for software devices:
@@ -1724,19 +1805,18 @@ gso:
if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
- if (dev->xmit_lock_owner != cpu) {
+ if (txq->xmit_lock_owner != cpu) {
- HARD_TX_LOCK(dev, cpu);
+ HARD_TX_LOCK(dev, txq, cpu);
- if (!netif_queue_stopped(dev) &&
- !netif_subqueue_stopped(dev, skb)) {
+ if (!netif_tx_queue_stopped(txq)) {
rc = 0;
- if (!dev_hard_start_xmit(skb, dev)) {
- HARD_TX_UNLOCK(dev);
+ if (!dev_hard_start_xmit(skb, dev, txq)) {
+ HARD_TX_UNLOCK(dev, txq);
goto out;
}
}
- HARD_TX_UNLOCK(dev);
+ HARD_TX_UNLOCK(dev, txq);
if (net_ratelimit())
printk(KERN_CRIT "Virtual device %s asks to "
"queue packet!\n", dev->name);
@@ -1880,7 +1960,7 @@ static void net_tx_action(struct softirq_action *h)
}
if (sd->output_queue) {
- struct net_device *head;
+ struct Qdisc *head;
local_irq_disable();
head = sd->output_queue;
@@ -1888,17 +1968,20 @@ static void net_tx_action(struct softirq_action *h)
local_irq_enable();
while (head) {
- struct net_device *dev = head;
+ struct Qdisc *q = head;
+ spinlock_t *root_lock;
+
head = head->next_sched;
smp_mb__before_clear_bit();
- clear_bit(__LINK_STATE_SCHED, &dev->state);
+ clear_bit(__QDISC_STATE_SCHED, &q->state);
- if (spin_trylock(&dev->queue_lock)) {
- qdisc_run(dev);
- spin_unlock(&dev->queue_lock);
+ root_lock = qdisc_root_lock(q);
+ if (spin_trylock(root_lock)) {
+ qdisc_run(q);
+ spin_unlock(root_lock);
} else {
- netif_schedule(dev);
+ __netif_schedule(q);
}
}
}
@@ -1979,10 +2062,11 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
*/
static int ing_filter(struct sk_buff *skb)
{
- struct Qdisc *q;
struct net_device *dev = skb->dev;
- int result = TC_ACT_OK;
u32 ttl = G_TC_RTTL(skb->tc_verd);
+ struct netdev_queue *rxq;
+ int result = TC_ACT_OK;
+ struct Qdisc *q;
if (MAX_RED_LOOP < ttl++) {
printk(KERN_WARNING
@@ -1994,10 +2078,14 @@ static int ing_filter(struct sk_buff *skb)
skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
- spin_lock(&dev->ingress_lock);
- if ((q = dev->qdisc_ingress) != NULL)
+ rxq = &dev->rx_queue;
+
+ q = rxq->qdisc;
+ if (q) {
+ spin_lock(qdisc_lock(q));
result = q->enqueue(skb, q);
- spin_unlock(&dev->ingress_lock);
+ spin_unlock(qdisc_lock(q));
+ }
return result;
}
@@ -2006,7 +2094,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
- if (!skb->dev->qdisc_ingress)
+ if (!skb->dev->rx_queue.qdisc)
goto out;
if (*pt_prev) {
@@ -2030,6 +2118,33 @@ out:
}
#endif
+/*
+ * netif_nit_deliver - deliver received packets to network taps
+ * @skb: buffer
+ *
+ * This function is used to deliver incoming packets to network
+ * taps. It should be used when the normal netif_receive_skb path
+ * is bypassed, for example because of VLAN acceleration.
+ */
+void netif_nit_deliver(struct sk_buff *skb)
+{
+ struct packet_type *ptype;
+
+ if (list_empty(&ptype_all))
+ return;
+
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb->mac_len = skb->network_header - skb->mac_header;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &ptype_all, list) {
+ if (!ptype->dev || ptype->dev == skb->dev)
+ deliver_skb(skb, ptype, skb->dev);
+ }
+ rcu_read_unlock();
+}
+
/**
* netif_receive_skb - process receive buffer from network
* @skb: buffer to process
@@ -2769,16 +2884,29 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
return 0;
}
-static void __dev_set_promiscuity(struct net_device *dev, int inc)
+static int __dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
ASSERT_RTNL();
- if ((dev->promiscuity += inc) == 0)
- dev->flags &= ~IFF_PROMISC;
- else
- dev->flags |= IFF_PROMISC;
+ dev->flags |= IFF_PROMISC;
+ dev->promiscuity += inc;
+ if (dev->promiscuity == 0) {
+ /*
+ * Avoid overflow.
+ * If inc causes overflow, untouch promisc and return error.
+ */
+ if (inc < 0)
+ dev->flags &= ~IFF_PROMISC;
+ else {
+ dev->promiscuity -= inc;
+ printk(KERN_WARNING "%s: promiscuity touches roof, "
+ "set promiscuity failed, promiscuity feature "
+ "of device might be broken.\n", dev->name);
+ return -EOVERFLOW;
+ }
+ }
if (dev->flags != old_flags) {
printk(KERN_INFO "device %s %s promiscuous mode\n",
dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
@@ -2796,6 +2924,7 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc)
if (dev->change_rx_flags)
dev->change_rx_flags(dev, IFF_PROMISC);
}
+ return 0;
}
/**
@@ -2807,14 +2936,19 @@ static void __dev_set_promiscuity(struct net_device *dev, int inc)
* remains above zero the interface remains promiscuous. Once it hits zero
* the device reverts back to normal filtering operation. A negative inc
* value is used to drop promiscuity on the device.
+ * Return 0 if successful or a negative errno code on error.
*/
-void dev_set_promiscuity(struct net_device *dev, int inc)
+int dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
+ int err;
- __dev_set_promiscuity(dev, inc);
+ err = __dev_set_promiscuity(dev, inc);
+ if (err < 0)
+ return err;
if (dev->flags != old_flags)
dev_set_rx_mode(dev);
+ return err;
}
/**
@@ -2827,22 +2961,38 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
* to all interfaces. Once it hits zero the device reverts back to normal
* filtering operation. A negative @inc value is used to drop the counter
* when releasing a resource needing all multicasts.
+ * Return 0 if successful or a negative errno code on error.
*/
-void dev_set_allmulti(struct net_device *dev, int inc)
+int dev_set_allmulti(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
ASSERT_RTNL();
dev->flags |= IFF_ALLMULTI;
- if ((dev->allmulti += inc) == 0)
- dev->flags &= ~IFF_ALLMULTI;
+ dev->allmulti += inc;
+ if (dev->allmulti == 0) {
+ /*
+ * Avoid overflow.
+ * If inc causes overflow, untouch allmulti and return error.
+ */
+ if (inc < 0)
+ dev->flags &= ~IFF_ALLMULTI;
+ else {
+ dev->allmulti -= inc;
+ printk(KERN_WARNING "%s: allmulti touches roof, "
+ "set allmulti failed, allmulti feature of "
+ "device might be broken.\n", dev->name);
+ return -EOVERFLOW;
+ }
+ }
if (dev->flags ^ old_flags) {
if (dev->change_rx_flags)
dev->change_rx_flags(dev, IFF_ALLMULTI);
dev_set_rx_mode(dev);
}
+ return 0;
}
/*
@@ -2881,9 +3031,9 @@ void __dev_set_rx_mode(struct net_device *dev)
void dev_set_rx_mode(struct net_device *dev)
{
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
__dev_set_rx_mode(dev);
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
}
int __dev_addr_delete(struct dev_addr_list **list, int *count,
@@ -2961,11 +3111,11 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
ASSERT_RTNL();
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
if (!err)
__dev_set_rx_mode(dev);
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
return err;
}
EXPORT_SYMBOL(dev_unicast_delete);
@@ -2987,11 +3137,11 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen)
ASSERT_RTNL();
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
if (!err)
__dev_set_rx_mode(dev);
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
return err;
}
EXPORT_SYMBOL(dev_unicast_add);
@@ -3058,12 +3208,12 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from)
{
int err = 0;
- netif_tx_lock_bh(to);
+ netif_addr_lock_bh(to);
err = __dev_addr_sync(&to->uc_list, &to->uc_count,
&from->uc_list, &from->uc_count);
if (!err)
__dev_set_rx_mode(to);
- netif_tx_unlock_bh(to);
+ netif_addr_unlock_bh(to);
return err;
}
EXPORT_SYMBOL(dev_unicast_sync);
@@ -3079,15 +3229,15 @@ EXPORT_SYMBOL(dev_unicast_sync);
*/
void dev_unicast_unsync(struct net_device *to, struct net_device *from)
{
- netif_tx_lock_bh(from);
- netif_tx_lock_bh(to);
+ netif_addr_lock_bh(from);
+ netif_addr_lock(to);
__dev_addr_unsync(&to->uc_list, &to->uc_count,
&from->uc_list, &from->uc_count);
__dev_set_rx_mode(to);
- netif_tx_unlock_bh(to);
- netif_tx_unlock_bh(from);
+ netif_addr_unlock(to);
+ netif_addr_unlock_bh(from);
}
EXPORT_SYMBOL(dev_unicast_unsync);
@@ -3107,7 +3257,7 @@ static void __dev_addr_discard(struct dev_addr_list **list)
static void dev_addr_discard(struct net_device *dev)
{
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
__dev_addr_discard(&dev->uc_list);
dev->uc_count = 0;
@@ -3115,7 +3265,7 @@ static void dev_addr_discard(struct net_device *dev)
__dev_addr_discard(&dev->mc_list);
dev->mc_count = 0;
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
}
unsigned dev_get_flags(const struct net_device *dev)
@@ -3688,6 +3838,21 @@ static void rollback_registered(struct net_device *dev)
dev_put(dev);
}
+static void __netdev_init_queue_locks_one(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_unused)
+{
+ spin_lock_init(&dev_queue->_xmit_lock);
+ netdev_set_lockdep_class(&dev_queue->_xmit_lock, dev->type);
+ dev_queue->xmit_lock_owner = -1;
+}
+
+static void netdev_init_queue_locks(struct net_device *dev)
+{
+ netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
+ __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
+}
+
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -3722,11 +3887,8 @@ int register_netdevice(struct net_device *dev)
BUG_ON(!dev_net(dev));
net = dev_net(dev);
- spin_lock_init(&dev->queue_lock);
- spin_lock_init(&dev->_xmit_lock);
- netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
- dev->xmit_lock_owner = -1;
- spin_lock_init(&dev->ingress_lock);
+ spin_lock_init(&dev->addr_list_lock);
+ netdev_init_queue_locks(dev);
dev->iflink = -1;
@@ -4007,6 +4169,19 @@ static struct net_device_stats *internal_stats(struct net_device *dev)
return &dev->stats;
}
+static void netdev_init_one_queue(struct net_device *dev,
+ struct netdev_queue *queue,
+ void *_unused)
+{
+ queue->dev = dev;
+}
+
+static void netdev_init_queues(struct net_device *dev)
+{
+ netdev_init_one_queue(dev, &dev->rx_queue, NULL);
+ netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+}
+
/**
* alloc_netdev_mq - allocate network device
* @sizeof_priv: size of private data to allocate space for
@@ -4021,14 +4196,14 @@ static struct net_device_stats *internal_stats(struct net_device *dev)
struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *), unsigned int queue_count)
{
- void *p;
+ struct netdev_queue *tx;
struct net_device *dev;
int alloc_size;
+ void *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
- alloc_size = sizeof(struct net_device) +
- sizeof(struct net_device_subqueue) * (queue_count - 1);
+ alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
/* ensure 32-byte alignment of private area */
alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
@@ -4043,22 +4218,33 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
return NULL;
}
+ tx = kzalloc(sizeof(struct netdev_queue) * queue_count, GFP_KERNEL);
+ if (!tx) {
+ printk(KERN_ERR "alloc_netdev: Unable to allocate "
+ "tx qdiscs.\n");
+ kfree(p);
+ return NULL;
+ }
+
dev = (struct net_device *)
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
dev->padded = (char *)dev - (char *)p;
dev_net_set(dev, &init_net);
+ dev->_tx = tx;
+ dev->num_tx_queues = queue_count;
+ dev->real_num_tx_queues = queue_count;
+
if (sizeof_priv) {
dev->priv = ((char *)dev +
- ((sizeof(struct net_device) +
- (sizeof(struct net_device_subqueue) *
- (queue_count - 1)) + NETDEV_ALIGN_CONST)
+ ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
& ~NETDEV_ALIGN_CONST));
}
- dev->egress_subqueue_count = queue_count;
dev->gso_max_size = GSO_MAX_SIZE;
+ netdev_init_queues(dev);
+
dev->get_stats = internal_stats;
netpoll_netdev_init(dev);
setup(dev);
@@ -4079,6 +4265,8 @@ void free_netdev(struct net_device *dev)
{
release_net(dev_net(dev));
+ kfree(dev->_tx);
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
@@ -4260,7 +4448,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
void *ocpu)
{
struct sk_buff **list_skb;
- struct net_device **list_net;
+ struct Qdisc **list_net;
struct sk_buff *skb;
unsigned int cpu, oldcpu = (unsigned long)ocpu;
struct softnet_data *sd, *oldsd;
@@ -4585,8 +4773,8 @@ static int __init net_dev_init(void)
dev_boot_phase = 0;
- open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
- open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
+ open_softirq(NET_TX_SOFTIRQ, net_tx_action);
+ open_softirq(NET_RX_SOFTIRQ, net_rx_action);
hotcpu_notifier(dev_cpu_callback, 0);
dst_init();
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index f8a3455f4493..5402b3b38e0d 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -72,7 +72,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
{
int err;
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
addr, alen, glbl);
if (!err) {
@@ -83,7 +83,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
__dev_set_rx_mode(dev);
}
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
return err;
}
@@ -95,11 +95,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
{
int err;
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
if (!err)
__dev_set_rx_mode(dev);
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
return err;
}
@@ -119,12 +119,12 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
{
int err = 0;
- netif_tx_lock_bh(to);
+ netif_addr_lock_bh(to);
err = __dev_addr_sync(&to->mc_list, &to->mc_count,
&from->mc_list, &from->mc_count);
if (!err)
__dev_set_rx_mode(to);
- netif_tx_unlock_bh(to);
+ netif_addr_unlock_bh(to);
return err;
}
@@ -143,15 +143,15 @@ EXPORT_SYMBOL(dev_mc_sync);
*/
void dev_mc_unsync(struct net_device *to, struct net_device *from)
{
- netif_tx_lock_bh(from);
- netif_tx_lock_bh(to);
+ netif_addr_lock_bh(from);
+ netif_addr_lock(to);
__dev_addr_unsync(&to->mc_list, &to->mc_count,
&from->mc_list, &from->mc_count);
__dev_set_rx_mode(to);
- netif_tx_unlock_bh(to);
- netif_tx_unlock_bh(from);
+ netif_addr_unlock(to);
+ netif_addr_unlock_bh(from);
}
EXPORT_SYMBOL(dev_mc_unsync);
@@ -164,7 +164,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN)
return 0;
- netif_tx_lock_bh(dev);
+ netif_addr_lock_bh(dev);
for (m = dev->mc_list; m; m = m->next) {
int i;
@@ -176,7 +176,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
seq_putc(seq, '\n');
}
- netif_tx_unlock_bh(dev);
+ netif_addr_unlock_bh(dev);
return 0;
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 0133b5ebd545..14ada537f895 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -209,6 +209,36 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
return 0;
}
+static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_rxnfc cmd;
+
+ if (!dev->ethtool_ops->set_rxhash)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+ return -EFAULT;
+
+ return dev->ethtool_ops->set_rxhash(dev, &cmd);
+}
+
+static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr)
+{
+ struct ethtool_rxnfc info;
+
+ if (!dev->ethtool_ops->get_rxhash)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&info, useraddr, sizeof(info)))
+ return -EFAULT;
+
+ dev->ethtool_ops->get_rxhash(dev, &info);
+
+ if (copy_to_user(useraddr, &info, sizeof(info)))
+ return -EFAULT;
+ return 0;
+}
+
static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
{
struct ethtool_regs regs;
@@ -826,6 +856,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GGSO:
case ETHTOOL_GFLAGS:
case ETHTOOL_GPFLAGS:
+ case ETHTOOL_GRXFH:
break;
default:
if (!capable(CAP_NET_ADMIN))
@@ -977,6 +1008,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
rc = ethtool_set_value(dev, useraddr,
dev->ethtool_ops->set_priv_flags);
break;
+ case ETHTOOL_GRXFH:
+ rc = ethtool_get_rxhash(dev, useraddr);
+ break;
+ case ETHTOOL_SRXFH:
+ rc = ethtool_set_rxhash(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 277a2302eb3a..79de3b14a8d1 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -69,7 +69,7 @@ static void rules_ops_put(struct fib_rules_ops *ops)
static void flush_route_cache(struct fib_rules_ops *ops)
{
if (ops->flush_cache)
- ops->flush_cache();
+ ops->flush_cache(ops);
}
int fib_rules_register(struct fib_rules_ops *ops)
diff --git a/net/core/flow.c b/net/core/flow.c
index 19991175fdeb..5cf81052d044 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -298,7 +298,7 @@ void flow_cache_flush(void)
init_completion(&info.completion);
local_bh_disable();
- smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0);
+ smp_call_function(flow_cache_flush_per_cpu, &info, 0);
flow_cache_flush_tasklet((unsigned long)&info);
local_bh_enable();
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index a5e372b9ec4d..bf8f7af699d7 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -77,10 +77,10 @@ static void rfc2863_policy(struct net_device *dev)
}
-static int linkwatch_urgent_event(struct net_device *dev)
+static bool linkwatch_urgent_event(struct net_device *dev)
{
return netif_running(dev) && netif_carrier_ok(dev) &&
- dev->qdisc != dev->qdisc_sleeping;
+ qdisc_tx_changing(dev);
}
@@ -180,10 +180,9 @@ static void __linkwatch_run_queue(int urgent_only)
rfc2863_policy(dev);
if (dev->flags & IFF_UP) {
- if (netif_carrier_ok(dev)) {
- WARN_ON(dev->qdisc_sleeping == &noop_qdisc);
+ if (netif_carrier_ok(dev))
dev_activate(dev);
- } else
+ else
dev_deactivate(dev);
netdev_state_change(dev);
@@ -214,7 +213,7 @@ static void linkwatch_event(struct work_struct *dummy)
void linkwatch_fire_event(struct net_device *dev)
{
- int urgent = linkwatch_urgent_event(dev);
+ bool urgent = linkwatch_urgent_event(dev);
if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
dev_hold(dev);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 65f01f71b3f3..f62c8af85d38 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -930,6 +930,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
buff = neigh->arp_queue.next;
__skb_unlink(buff, &neigh->arp_queue);
kfree_skb(buff);
+ NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
}
__skb_queue_tail(&neigh->arp_queue, skb);
}
@@ -2462,12 +2463,12 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n");
+ seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n");
return 0;
}
seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
- "%08lx %08lx %08lx %08lx\n",
+ "%08lx %08lx %08lx %08lx %08lx\n",
atomic_read(&tbl->entries),
st->allocs,
@@ -2483,7 +2484,8 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
st->rcv_probes_ucast,
st->periodic_gc_runs,
- st->forced_gc_runs
+ st->forced_gc_runs,
+ st->unres_discards
);
return 0;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 90e2177af081..c1f4e0d428c0 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -242,11 +242,11 @@ static ssize_t netstat_show(const struct device *d,
offset % sizeof(unsigned long) != 0);
read_lock(&dev_base_lock);
- if (dev_isalive(dev) && dev->get_stats &&
- (stats = (*dev->get_stats)(dev)))
+ if (dev_isalive(dev)) {
+ stats = dev->get_stats(dev);
ret = sprintf(buf, fmt_ulong,
*(unsigned long *)(((u8 *) stats) + offset));
-
+ }
read_unlock(&dev_base_lock);
return ret;
}
@@ -318,7 +318,7 @@ static struct attribute_group netstat_group = {
.attrs = netstat_attrs,
};
-#ifdef CONFIG_WIRELESS_EXT
+#ifdef CONFIG_WIRELESS_EXT_SYSFS
/* helper function that does all the locking etc for wireless stats */
static ssize_t wireless_show(struct device *d, char *buf,
ssize_t (*format)(const struct iw_statistics *,
@@ -457,10 +457,9 @@ int netdev_register_kobject(struct net_device *net)
strlcpy(dev->bus_id, net->name, BUS_ID_SIZE);
#ifdef CONFIG_SYSFS
- if (net->get_stats)
- *groups++ = &netstat_group;
+ *groups++ = &netstat_group;
-#ifdef CONFIG_WIRELESS_EXT
+#ifdef CONFIG_WIRELESS_EXT_SYSFS
if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
*groups++ = &wireless_group;
#endif
@@ -469,6 +468,19 @@ int netdev_register_kobject(struct net_device *net)
return device_add(dev);
}
+int netdev_class_create_file(struct class_attribute *class_attr)
+{
+ return class_create_file(&net_class, class_attr);
+}
+
+void netdev_class_remove_file(struct class_attribute *class_attr)
+{
+ class_remove_file(&net_class, class_attr);
+}
+
+EXPORT_SYMBOL(netdev_class_create_file);
+EXPORT_SYMBOL(netdev_class_remove_file);
+
void netdev_initialize_kobject(struct net_device *net)
{
struct device *device = &(net->dev);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 8fb134da0346..c12720895ecf 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -58,25 +58,27 @@ static void queue_process(struct work_struct *work)
while ((skb = skb_dequeue(&npinfo->txq))) {
struct net_device *dev = skb->dev;
+ struct netdev_queue *txq;
if (!netif_device_present(dev) || !netif_running(dev)) {
__kfree_skb(skb);
continue;
}
+ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+
local_irq_save(flags);
- netif_tx_lock(dev);
- if ((netif_queue_stopped(dev) ||
- netif_subqueue_stopped(dev, skb)) ||
- dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
+ __netif_tx_lock(txq, smp_processor_id());
+ if (netif_tx_queue_stopped(txq) ||
+ dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) {
skb_queue_head(&npinfo->txq, skb);
- netif_tx_unlock(dev);
+ __netif_tx_unlock(txq);
local_irq_restore(flags);
schedule_delayed_work(&npinfo->tx_work, HZ/10);
return;
}
- netif_tx_unlock(dev);
+ __netif_tx_unlock(txq);
local_irq_restore(flags);
}
}
@@ -278,17 +280,19 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
/* don't get messages out of order, and no recursion */
if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
+ struct netdev_queue *txq;
unsigned long flags;
+ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+
local_irq_save(flags);
/* try until next clock tick */
for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
tries > 0; --tries) {
- if (netif_tx_trylock(dev)) {
- if (!netif_queue_stopped(dev) &&
- !netif_subqueue_stopped(dev, skb))
+ if (__netif_tx_trylock(txq)) {
+ if (!netif_tx_queue_stopped(txq))
status = dev->hard_start_xmit(skb, dev);
- netif_tx_unlock(dev);
+ __netif_tx_unlock(txq);
if (status == NETDEV_TX_OK)
break;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fdf537707e51..906802db4ed4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2123,6 +2123,24 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
}
}
#endif
+static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
+{
+ if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
+ __u16 t;
+ if (pkt_dev->flags & F_QUEUE_MAP_RND) {
+ t = random32() %
+ (pkt_dev->queue_map_max -
+ pkt_dev->queue_map_min + 1)
+ + pkt_dev->queue_map_min;
+ } else {
+ t = pkt_dev->cur_queue_map + 1;
+ if (t > pkt_dev->queue_map_max)
+ t = pkt_dev->queue_map_min;
+ }
+ pkt_dev->cur_queue_map = t;
+ }
+}
+
/* Increment/randomize headers according to flags and current values
* for IP src/dest, UDP src/dst port, MAC-Addr src/dst
*/
@@ -2325,19 +2343,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev->cur_pkt_size = t;
}
- if (pkt_dev->queue_map_min < pkt_dev->queue_map_max) {
- __u16 t;
- if (pkt_dev->flags & F_QUEUE_MAP_RND) {
- t = random32() %
- (pkt_dev->queue_map_max - pkt_dev->queue_map_min + 1)
- + pkt_dev->queue_map_min;
- } else {
- t = pkt_dev->cur_queue_map + 1;
- if (t > pkt_dev->queue_map_max)
- t = pkt_dev->queue_map_min;
- }
- pkt_dev->cur_queue_map = t;
- }
+ set_cur_queue_map(pkt_dev);
pkt_dev->flows[flow].count++;
}
@@ -2458,7 +2464,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
__be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
__be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */
__be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
-
+ u16 queue_map;
if (pkt_dev->nr_labels)
protocol = htons(ETH_P_MPLS_UC);
@@ -2469,6 +2475,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
+ queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
datalen = (odev->hard_header_len + 16) & ~0xf;
@@ -2507,7 +2514,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
skb->network_header = skb->tail;
skb->transport_header = skb->network_header + sizeof(struct iphdr);
skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr));
- skb_set_queue_mapping(skb, pkt_dev->cur_queue_map);
+ skb_set_queue_mapping(skb, queue_map);
iph = ip_hdr(skb);
udph = udp_hdr(skb);
@@ -2797,6 +2804,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
__be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */
__be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */
__be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */
+ u16 queue_map;
if (pkt_dev->nr_labels)
protocol = htons(ETH_P_MPLS_UC);
@@ -2807,6 +2815,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
/* Update any of the values, used when we're incrementing various
* fields.
*/
+ queue_map = pkt_dev->cur_queue_map;
mod_cur_headers(pkt_dev);
skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 +
@@ -2844,7 +2853,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
skb->network_header = skb->tail;
skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr));
- skb_set_queue_mapping(skb, pkt_dev->cur_queue_map);
+ skb_set_queue_mapping(skb, queue_map);
iph = ipv6_hdr(skb);
udph = udp_hdr(skb);
@@ -3263,7 +3272,9 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
{
struct net_device *odev = NULL;
+ struct netdev_queue *txq;
__u64 idle_start = 0;
+ u16 queue_map;
int ret;
odev = pkt_dev->odev;
@@ -3285,9 +3296,15 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
}
}
- if ((netif_queue_stopped(odev) ||
- (pkt_dev->skb &&
- netif_subqueue_stopped(odev, pkt_dev->skb))) ||
+ if (!pkt_dev->skb) {
+ set_cur_queue_map(pkt_dev);
+ queue_map = pkt_dev->cur_queue_map;
+ } else {
+ queue_map = skb_get_queue_mapping(pkt_dev->skb);
+ }
+
+ txq = netdev_get_tx_queue(odev, queue_map);
+ if (netif_tx_queue_stopped(txq) ||
need_resched()) {
idle_start = getCurUs();
@@ -3303,8 +3320,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->idle_acc += getCurUs() - idle_start;
- if (netif_queue_stopped(odev) ||
- netif_subqueue_stopped(odev, pkt_dev->skb)) {
+ if (netif_tx_queue_stopped(txq)) {
pkt_dev->next_tx_us = getCurUs(); /* TODO */
pkt_dev->next_tx_ns = 0;
goto out; /* Try the next interface */
@@ -3331,9 +3347,12 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
}
}
- netif_tx_lock_bh(odev);
- if (!netif_queue_stopped(odev) &&
- !netif_subqueue_stopped(odev, pkt_dev->skb)) {
+ /* fill_packet() might have changed the queue */
+ queue_map = skb_get_queue_mapping(pkt_dev->skb);
+ txq = netdev_get_tx_queue(odev, queue_map);
+
+ __netif_tx_lock_bh(txq);
+ if (!netif_tx_queue_stopped(txq)) {
atomic_inc(&(pkt_dev->skb->users));
retry_now:
@@ -3377,7 +3396,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->next_tx_ns = 0;
}
- netif_tx_unlock_bh(odev);
+ __netif_tx_unlock_bh(txq);
/* If pkt_dev->count is zero, then run forever */
if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a9a77216310e..71edb8b36341 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -605,8 +605,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags)
{
+ struct netdev_queue *txq;
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
+ struct net_device_stats *stats;
+ struct nlattr *attr;
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
if (nlh == NULL)
@@ -633,8 +636,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (dev->master)
NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
- if (dev->qdisc_sleeping)
- NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id);
+ txq = netdev_get_tx_queue(dev, 0);
+ if (txq->qdisc_sleeping)
+ NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id);
if (1) {
struct rtnl_link_ifmap map = {
@@ -653,19 +657,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast);
}
- if (dev->get_stats) {
- struct net_device_stats *stats = dev->get_stats(dev);
- if (stats) {
- struct nlattr *attr;
+ attr = nla_reserve(skb, IFLA_STATS,
+ sizeof(struct rtnl_link_stats));
+ if (attr == NULL)
+ goto nla_put_failure;
- attr = nla_reserve(skb, IFLA_STATS,
- sizeof(struct rtnl_link_stats));
- if (attr == NULL)
- goto nla_put_failure;
-
- copy_rtnl_link_stats(nla_data(attr), stats);
- }
- }
+ stats = dev->get_stats(dev);
+ copy_rtnl_link_stats(nla_data(attr), stats);
if (dev->rtnl_link_ops) {
if (rtnl_link_fill(skb, dev) < 0)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 366621610e76..e4115672b6cf 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4,8 +4,6 @@
* Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
* Florian La Roche <rzsfl@rz.uni-sb.de>
*
- * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
- *
* Fixes:
* Alan Cox : Fixed the worst of the load
* balancer bugs.
@@ -461,6 +459,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->tc_verd = old->tc_verd;
#endif
#endif
+ new->vlan_tci = old->vlan_tci;
+
skb_copy_secmark(new, old);
}
@@ -1282,114 +1282,83 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
return 0;
}
-/*
- * Map linear and fragment data from the skb to spd. Returns number of
- * pages mapped.
- */
-static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
- unsigned int *total_len,
- struct splice_pipe_desc *spd)
-{
- unsigned int nr_pages = spd->nr_pages;
- unsigned int poff, plen, len, toff, tlen;
- int headlen, seg, error = 0;
-
- toff = *offset;
- tlen = *total_len;
- if (!tlen) {
- error = 1;
- goto err;
+static inline void __segment_seek(struct page **page, unsigned int *poff,
+ unsigned int *plen, unsigned int off)
+{
+ *poff += off;
+ *page += *poff / PAGE_SIZE;
+ *poff = *poff % PAGE_SIZE;
+ *plen -= off;
+}
+
+static inline int __splice_segment(struct page *page, unsigned int poff,
+ unsigned int plen, unsigned int *off,
+ unsigned int *len, struct sk_buff *skb,
+ struct splice_pipe_desc *spd)
+{
+ if (!*len)
+ return 1;
+
+ /* skip this segment if already processed */
+ if (*off >= plen) {
+ *off -= plen;
+ return 0;
}
- /*
- * if the offset is greater than the linear part, go directly to
- * the fragments.
- */
- headlen = skb_headlen(skb);
- if (toff >= headlen) {
- toff -= headlen;
- goto map_frag;
+ /* ignore any bits we already processed */
+ if (*off) {
+ __segment_seek(&page, &poff, &plen, *off);
+ *off = 0;
}
- /*
- * first map the linear region into the pages/partial map, skipping
- * any potential initial offset.
- */
- len = 0;
- while (len < headlen) {
- void *p = skb->data + len;
-
- poff = (unsigned long) p & (PAGE_SIZE - 1);
- plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff);
- len += plen;
-
- if (toff) {
- if (plen <= toff) {
- toff -= plen;
- continue;
- }
- plen -= toff;
- poff += toff;
- toff = 0;
- }
+ do {
+ unsigned int flen = min(*len, plen);
- plen = min(plen, tlen);
- if (!plen)
- break;
+ /* the linear region may spread across several pages */
+ flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
- /*
- * just jump directly to update and return, no point
- * in going over fragments when the output is full.
- */
- error = spd_fill_page(spd, virt_to_page(p), plen, poff, skb);
- if (error)
- goto done;
+ if (spd_fill_page(spd, page, flen, poff, skb))
+ return 1;
- tlen -= plen;
- }
+ __segment_seek(&page, &poff, &plen, flen);
+ *len -= flen;
+
+ } while (*len && plen);
+
+ return 0;
+}
+
+/*
+ * Map linear and fragment data from the skb to spd. It reports failure if the
+ * pipe is full or if we already spliced the requested length.
+ */
+static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
+ unsigned int *len,
+ struct splice_pipe_desc *spd)
+{
+ int seg;
+
+ /*
+ * map the linear part
+ */
+ if (__splice_segment(virt_to_page(skb->data),
+ (unsigned long) skb->data & (PAGE_SIZE - 1),
+ skb_headlen(skb),
+ offset, len, skb, spd))
+ return 1;
/*
* then map the fragments
*/
-map_frag:
for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
- plen = f->size;
- poff = f->page_offset;
-
- if (toff) {
- if (plen <= toff) {
- toff -= plen;
- continue;
- }
- plen -= toff;
- poff += toff;
- toff = 0;
- }
-
- plen = min(plen, tlen);
- if (!plen)
- break;
-
- error = spd_fill_page(spd, f->page, plen, poff, skb);
- if (error)
- break;
-
- tlen -= plen;
+ if (__splice_segment(f->page, f->page_offset, f->size,
+ offset, len, skb, spd))
+ return 1;
}
-done:
- if (spd->nr_pages - nr_pages) {
- *offset = 0;
- *total_len = tlen;
- return 0;
- }
-err:
- /* update the offset to reflect the linear part skip, if any */
- if (!error)
- *offset = toff;
- return error;
+ return 0;
}
/*
@@ -2288,6 +2257,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features)
skb_copy_queue_mapping(nskb, skb);
nskb->priority = skb->priority;
nskb->protocol = skb->protocol;
+ nskb->vlan_tci = skb->vlan_tci;
nskb->dst = dst_clone(skb->dst);
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
nskb->pkt_type = skb->pkt_type;
@@ -2592,6 +2562,13 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
return true;
}
+void __skb_warn_lro_forwarding(const struct sk_buff *skb)
+{
+ if (net_ratelimit())
+ pr_warning("%s: received packets cannot be forwarded"
+ " while LRO is enabled\n", skb->dev->name);
+}
+
EXPORT_SYMBOL(___pskb_trim);
EXPORT_SYMBOL(__kfree_skb);
EXPORT_SYMBOL(kfree_skb);
@@ -2625,6 +2602,7 @@ EXPORT_SYMBOL(skb_seq_read);
EXPORT_SYMBOL(skb_abort_seq_read);
EXPORT_SYMBOL(skb_find_text);
EXPORT_SYMBOL(skb_append_datato_frags);
+EXPORT_SYMBOL(__skb_warn_lro_forwarding);
EXPORT_SYMBOL_GPL(skb_to_sgvec);
EXPORT_SYMBOL_GPL(skb_cow_data);
diff --git a/net/core/sock.c b/net/core/sock.c
index 88094cb09c06..10a64d57078c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -7,8 +7,6 @@
* handler for protocols to use and generic option handler.
*
*
- * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Florian La Roche, <flla@stud.uni-sb.de>
@@ -1068,7 +1066,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
* to be taken into account in all callers. -acme
*/
sk_refcnt_debug_inc(newsk);
- newsk->sk_socket = NULL;
+ sk_set_socket(newsk, NULL);
newsk->sk_sleep = NULL;
if (newsk->sk_prot->sockets_allocated)
@@ -1444,7 +1442,7 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
/* Under pressure. */
if (allocated > prot->sysctl_mem[1])
if (prot->enter_memory_pressure)
- prot->enter_memory_pressure();
+ prot->enter_memory_pressure(sk);
/* Over hard limit. */
if (allocated > prot->sysctl_mem[2])
@@ -1704,7 +1702,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_rcvbuf = sysctl_rmem_default;
sk->sk_sndbuf = sysctl_wmem_default;
sk->sk_state = TCP_CLOSE;
- sk->sk_socket = sock;
+ sk_set_socket(sk, sock);
sock_set_flag(sk, SOCK_ZAPPED);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 5fc801057244..a570e2af22cb 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -125,14 +125,6 @@ static struct ctl_table net_core_table[] = {
#endif /* CONFIG_XFRM */
#endif /* CONFIG_NET */
{
- .ctl_name = NET_CORE_SOMAXCONN,
- .procname = "somaxconn",
- .data = &init_net.core.sysctl_somaxconn,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
.ctl_name = NET_CORE_BUDGET,
.procname = "netdev_budget",
.data = &netdev_budget,
@@ -151,6 +143,18 @@ static struct ctl_table net_core_table[] = {
{ .ctl_name = 0 }
};
+static struct ctl_table netns_core_table[] = {
+ {
+ .ctl_name = NET_CORE_SOMAXCONN,
+ .procname = "somaxconn",
+ .data = &init_net.core.sysctl_somaxconn,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ { .ctl_name = 0 }
+};
+
static __net_initdata struct ctl_path net_core_path[] = {
{ .procname = "net", .ctl_name = CTL_NET, },
{ .procname = "core", .ctl_name = NET_CORE, },
@@ -159,23 +163,17 @@ static __net_initdata struct ctl_path net_core_path[] = {
static __net_init int sysctl_core_net_init(struct net *net)
{
- struct ctl_table *tbl, *tmp;
+ struct ctl_table *tbl;
net->core.sysctl_somaxconn = SOMAXCONN;
- tbl = net_core_table;
+ tbl = netns_core_table;
if (net != &init_net) {
- tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL);
+ tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
if (tbl == NULL)
goto err_dup;
- for (tmp = tbl; tmp->procname; tmp++) {
- if (tmp->data >= (void *)&init_net &&
- tmp->data < (void *)(&init_net + 1))
- tmp->data += (char *)net - (char *)&init_net;
- else
- tmp->mode &= ~0222;
- }
+ tbl[0].data = &net->core.sysctl_somaxconn;
}
net->core.sysctl_hdr = register_net_sysctl_table(net,
@@ -186,7 +184,7 @@ static __net_init int sysctl_core_net_init(struct net *net)
return 0;
err_reg:
- if (tbl != net_core_table)
+ if (tbl != netns_core_table)
kfree(tbl);
err_dup:
return -ENOMEM;
@@ -198,7 +196,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net)
tbl = net->core.sysctl_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->core.sysctl_hdr);
- BUG_ON(tbl == net_core_table);
+ BUG_ON(tbl == netns_core_table);
kfree(tbl);
}
@@ -209,6 +207,7 @@ static __net_initdata struct pernet_operations sysctl_core_ops = {
static __init int sysctl_core_init(void)
{
+ register_net_sysctl_rotable(net_core_path, net_core_table);
return register_pernet_subsys(&sysctl_core_ops);
}
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index a1929f33d703..f6756e0c9e69 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -794,7 +794,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
- const u32 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
+ const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
const bool is_data_packet = dccp_data_packet(skb);
if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
@@ -825,18 +825,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
/*
- * Handle pending losses and otherwise check for new loss
+ * Perform loss detection and handle pending losses
*/
- if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist) &&
- tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist,
- &hcrx->ccid3hcrx_li_hist,
- skb, ndp, ccid3_first_li, sk) ) {
+ if (tfrc_rx_handle_loss(&hcrx->ccid3hcrx_hist, &hcrx->ccid3hcrx_li_hist,
+ skb, ndp, ccid3_first_li, sk)) {
do_feedback = CCID3_FBACK_PARAM_CHANGE;
goto done_receiving;
}
- if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp))
- goto update_records;
+ if (tfrc_rx_hist_loss_pending(&hcrx->ccid3hcrx_hist))
+ return; /* done receiving */
/*
* Handle data packets: RTT sampling and monitoring p
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 849e181e698f..bcd6ac415bb9 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -90,14 +90,14 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
{
struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
u32 old_i_mean = lh->i_mean;
- s64 length;
+ s64 len;
if (cur == NULL) /* not initialised */
return 0;
- length = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq);
+ len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1;
- if (length - cur->li_length <= 0) /* duplicate or reordered */
+ if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */
return 0;
if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
@@ -114,7 +114,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */
return 0;
- cur->li_length = length;
+ cur->li_length = len;
tfrc_lh_calc_i_mean(lh);
return (lh->i_mean < old_i_mean);
@@ -159,7 +159,7 @@ int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
else {
cur->li_length = dccp_delta_seqno(cur->li_seqno, new->li_seqno);
new->li_length = dccp_delta_seqno(new->li_seqno,
- tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno);
+ tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno) + 1;
if (lh->counter > (2*LIH_SIZE))
lh->counter -= LIH_SIZE;
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 20af1a693427..6cc108afdc3b 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -153,7 +153,7 @@ void tfrc_rx_packet_history_exit(void)
static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry,
const struct sk_buff *skb,
- const u32 ndp)
+ const u64 ndp)
{
const struct dccp_hdr *dh = dccp_hdr(skb);
@@ -166,7 +166,7 @@ static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry,
void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
const struct sk_buff *skb,
- const u32 ndp)
+ const u64 ndp)
{
struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h);
@@ -206,31 +206,39 @@ static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
*
* In the descriptions, `Si' refers to the sequence number of entry number i,
* whose NDP count is `Ni' (lower case is used for variables).
- * Note: All __after_loss functions expect that a test against duplicates has
- * been performed already: the seqno of the skb must not be less than the
- * seqno of loss_prev; and it must not equal that of any valid hist_entry.
+ * Note: All __xxx_loss functions expect that a test against duplicates has been
+ * performed already: the seqno of the skb must not be less than the seqno
+ * of loss_prev; and it must not equal that of any valid history entry.
*/
+static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1)
+{
+ u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
+ s1 = DCCP_SKB_CB(skb)->dccpd_seq;
+
+ if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */
+ h->loss_count = 1;
+ tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1);
+ }
+}
+
static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
{
u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
s2 = DCCP_SKB_CB(skb)->dccpd_seq;
- int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp,
- d12 = dccp_delta_seqno(s1, s2), d2;
- if (d12 > 0) { /* S1 < S2 */
+ if (likely(dccp_delta_seqno(s1, s2) > 0)) { /* S1 < S2 */
h->loss_count = 2;
tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2);
return;
}
/* S0 < S2 < S1 */
- d2 = dccp_delta_seqno(s0, s2);
- if (d2 == 1 || n2 >= d2) { /* S2 is direct successor of S0 */
- int d21 = -d12;
+ if (dccp_loss_free(s0, s2, n2)) {
+ u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp;
- if (d21 == 1 || n1 >= d21) {
+ if (dccp_loss_free(s2, s1, n1)) {
/* hole is filled: S0, S2, and S1 are consecutive */
h->loss_count = 0;
h->loss_start = tfrc_rx_hist_index(h, 1);
@@ -238,9 +246,9 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2
/* gap between S2 and S1: just update loss_prev */
tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
- } else { /* hole between S0 and S2 */
+ } else { /* gap between S0 and S2 */
/*
- * Reorder history to insert S2 between S0 and s1
+ * Reorder history to insert S2 between S0 and S1
*/
tfrc_rx_hist_swap(h, 0, 3);
h->loss_start = tfrc_rx_hist_index(h, 3);
@@ -256,22 +264,18 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno,
s3 = DCCP_SKB_CB(skb)->dccpd_seq;
- int n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp,
- d23 = dccp_delta_seqno(s2, s3), d13, d3, d31;
- if (d23 > 0) { /* S2 < S3 */
+ if (likely(dccp_delta_seqno(s2, s3) > 0)) { /* S2 < S3 */
h->loss_count = 3;
tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3);
return 1;
}
/* S3 < S2 */
- d13 = dccp_delta_seqno(s1, s3);
- if (d13 > 0) {
+ if (dccp_delta_seqno(s1, s3) > 0) { /* S1 < S3 < S2 */
/*
- * The sequence number order is S1, S3, S2
- * Reorder history to insert entry between S1 and S2
+ * Reorder history to insert S3 between S1 and S2
*/
tfrc_rx_hist_swap(h, 2, 3);
tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3);
@@ -280,17 +284,15 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
}
/* S0 < S3 < S1 */
- d31 = -d13;
- d3 = dccp_delta_seqno(s0, s3);
- if (d3 == 1 || n3 >= d3) { /* S3 is a successor of S0 */
+ if (dccp_loss_free(s0, s3, n3)) {
+ u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp;
- if (d31 == 1 || n1 >= d31) {
+ if (dccp_loss_free(s3, s1, n1)) {
/* hole between S0 and S1 filled by S3 */
- int d2 = dccp_delta_seqno(s1, s2),
- n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp;
+ u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp;
- if (d2 == 1 || n2 >= d2) {
+ if (dccp_loss_free(s1, s2, n2)) {
/* entire hole filled by S0, S3, S1, S2 */
h->loss_start = tfrc_rx_hist_index(h, 2);
h->loss_count = 0;
@@ -307,8 +309,8 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
}
/*
- * The remaining case: S3 is not a successor of S0.
- * Sequence order is S0, S3, S1, S2; reorder to insert between S0 and S1
+ * The remaining case: S0 < S3 < S1 < S2; gap between S0 and S3
+ * Reorder history to insert S3 between S0 and S1.
*/
tfrc_rx_hist_swap(h, 0, 3);
h->loss_start = tfrc_rx_hist_index(h, 3);
@@ -318,33 +320,25 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
return 1;
}
-/* return the signed modulo-2^48 sequence number distance from entry e1 to e2 */
-static s64 tfrc_rx_hist_delta_seqno(struct tfrc_rx_hist *h, u8 e1, u8 e2)
-{
- DCCP_BUG_ON(e1 > h->loss_count || e2 > h->loss_count);
-
- return dccp_delta_seqno(tfrc_rx_hist_entry(h, e1)->tfrchrx_seqno,
- tfrc_rx_hist_entry(h, e2)->tfrchrx_seqno);
-}
-
/* recycle RX history records to continue loss detection if necessary */
static void __three_after_loss(struct tfrc_rx_hist *h)
{
/*
- * The distance between S0 and S1 is always greater than 1 and the NDP
- * count of S1 is smaller than this distance. Otherwise there would
- * have been no loss. Hence it is only necessary to see whether there
- * are further missing data packets between S1/S2 and S2/S3.
+ * At this stage we know already that there is a gap between S0 and S1
+ * (since S0 was the highest sequence number received before detecting
+ * the loss). To recycle the loss record, it is thus only necessary to
+ * check for other possible gaps between S1/S2 and between S2/S3.
*/
- int d2 = tfrc_rx_hist_delta_seqno(h, 1, 2),
- d3 = tfrc_rx_hist_delta_seqno(h, 2, 3),
- n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp,
+ u64 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
+ s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno,
+ s3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_seqno;
+ u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp,
n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp;
- if (d2 == 1 || n2 >= d2) { /* S2 is successor to S1 */
+ if (dccp_loss_free(s1, s2, n2)) {
- if (d3 == 1 || n3 >= d3) {
- /* S3 is successor of S2: entire hole is filled */
+ if (dccp_loss_free(s2, s3, n3)) {
+ /* no gap between S2 and S3: entire hole is filled */
h->loss_start = tfrc_rx_hist_index(h, 3);
h->loss_count = 0;
} else {
@@ -353,7 +347,7 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
h->loss_count = 1;
}
- } else { /* gap between S1 and S2 */
+ } else { /* gap between S1 and S2 */
h->loss_start = tfrc_rx_hist_index(h, 1);
h->loss_count = 2;
}
@@ -370,15 +364,20 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
* Chooses action according to pending loss, updates LI database when a new
* loss was detected, and does required post-processing. Returns 1 when caller
* should send feedback, 0 otherwise.
+ * Since it also takes care of reordering during loss detection and updates the
+ * records accordingly, the caller should not perform any more RX history
+ * operations when loss_count is greater than 0 after calling this function.
*/
int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
struct tfrc_loss_hist *lh,
- struct sk_buff *skb, u32 ndp,
+ struct sk_buff *skb, const u64 ndp,
u32 (*calc_first_li)(struct sock *), struct sock *sk)
{
int is_new_loss = 0;
- if (h->loss_count == 1) {
+ if (h->loss_count == 0) {
+ __do_track_loss(h, skb, ndp);
+ } else if (h->loss_count == 1) {
__one_after_loss(h, skb, ndp);
} else if (h->loss_count != 2) {
DCCP_BUG("invalid loss_count %d", h->loss_count);
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index c7eeda49cb20..461cc91cce88 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -64,7 +64,7 @@ struct tfrc_rx_hist_entry {
u64 tfrchrx_seqno:48,
tfrchrx_ccval:4,
tfrchrx_type:4;
- u32 tfrchrx_ndp; /* In fact it is from 8 to 24 bits */
+ u64 tfrchrx_ndp:48;
ktime_t tfrchrx_tstamp;
};
@@ -118,41 +118,21 @@ static inline struct tfrc_rx_hist_entry *
return h->ring[h->loss_start];
}
-/* initialise loss detection and disable RTT sampling */
-static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h)
-{
- h->loss_count = 1;
-}
-
/* indicate whether previously a packet was detected missing */
-static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
-{
- return h->loss_count;
-}
-
-/* any data packets missing between last reception and skb ? */
-static inline int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
- const struct sk_buff *skb,
- u32 ndp)
+static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
{
- int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)->tfrchrx_seqno,
- DCCP_SKB_CB(skb)->dccpd_seq);
-
- if (delta > 1 && ndp < delta)
- tfrc_rx_hist_loss_indicated(h);
-
- return tfrc_rx_hist_loss_pending(h);
+ return h->loss_count > 0;
}
extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
- const struct sk_buff *skb, const u32 ndp);
+ const struct sk_buff *skb, const u64 ndp);
extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
struct tfrc_loss_hist;
extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
struct tfrc_loss_hist *lh,
- struct sk_buff *skb, u32 ndp,
+ struct sk_buff *skb, const u64 ndp,
u32 (*first_li)(struct sock *sk),
struct sock *sk);
extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f44d492d3b74..32617e0576cb 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -153,6 +153,21 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
return after48(seq1, seq2) ? seq1 : seq2;
}
+/**
+ * dccp_loss_free - Evaluates condition for data loss from RFC 4340, 7.7.1
+ * @s1: start sequence number
+ * @s2: end sequence number
+ * @ndp: NDP count on packet with sequence number @s2
+ * Returns true if the sequence range s1...s2 has no data loss.
+ */
+static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp)
+{
+ s64 delta = dccp_delta_seqno(s1, s2);
+
+ BUG_TRAP(delta >= 0);
+ return (u64)delta <= ndp + 1;
+}
+
enum {
DCCP_MIB_NUM = 0,
DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
@@ -262,7 +277,7 @@ extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
const struct dccp_hdr *dh, const unsigned len);
extern int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
-extern int dccp_destroy_sock(struct sock *sk);
+extern void dccp_destroy_sock(struct sock *sk);
extern void dccp_close(struct sock *sk, long timeout);
extern struct sk_buff *dccp_make_response(struct sock *sk,
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 37d27bcb361f..2622ace17c46 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -205,17 +205,18 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
struct sock *sk;
__u64 seq;
int err;
+ struct net *net = dev_net(skb->dev);
if (skb->len < (iph->ihl << 2) + 8) {
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
}
- sk = inet_lookup(dev_net(skb->dev), &dccp_hashinfo,
+ sk = inet_lookup(net, &dccp_hashinfo,
iph->daddr, dh->dccph_dport,
iph->saddr, dh->dccph_sport, inet_iif(skb));
if (sk == NULL) {
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
}
@@ -229,7 +230,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
* servers this needs to be solved differently.
*/
if (sock_owned_by_user(sk))
- NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == DCCP_CLOSED)
goto out;
@@ -238,7 +239,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
seq = dccp_hdr_seq(dh);
if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
!between48(seq, dp->dccps_swl, dp->dccps_swh)) {
- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -285,7 +286,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
BUG_TRAP(!req->sk);
if (seq != dccp_rsk(req)->dreq_iss) {
- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
/*
@@ -408,9 +409,9 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
return newsk;
exit_overflow:
- NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
exit:
- NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
dst_release(dst);
return NULL;
}
@@ -464,7 +465,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
security_skb_classify_flow(skb, &fl);
if (ip_route_output_flow(net, &rt, &fl, sk, 0)) {
- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index f7fe2a572d7b..b74e8b2cbe55 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -93,8 +93,9 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
struct sock *sk;
int err;
__u64 seq;
+ struct net *net = dev_net(skb->dev);
- sk = inet6_lookup(dev_net(skb->dev), &dccp_hashinfo,
+ sk = inet6_lookup(net, &dccp_hashinfo,
&hdr->daddr, dh->dccph_dport,
&hdr->saddr, dh->dccph_sport, inet6_iif(skb));
@@ -110,7 +111,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
- NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == DCCP_CLOSED)
goto out;
@@ -188,7 +189,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
BUG_TRAP(req->sk == NULL);
if (seq != dccp_rsk(req)->dreq_iss) {
- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -629,9 +630,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
return newsk;
out_overflow:
- NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out:
- NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
if (opt != NULL && opt != np->opt)
sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
@@ -1091,10 +1092,10 @@ static int dccp_v6_init_sock(struct sock *sk)
return err;
}
-static int dccp_v6_destroy_sock(struct sock *sk)
+static void dccp_v6_destroy_sock(struct sock *sk)
{
dccp_destroy_sock(sk);
- return inet6_destroy_sock(sk);
+ inet6_destroy_sock(sk);
}
static struct timewait_sock_ops dccp6_timewait_sock_ops = {
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 43bc24e761d0..dc7c158a2f4b 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -124,12 +124,12 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
mandatory = 1;
break;
case DCCPO_NDP_COUNT:
- if (len > 3)
+ if (len > 6)
goto out_invalid_option;
opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
- dccp_pr_debug("%s rx opt: NDP count=%d\n", dccp_role(sk),
- opt_recv->dccpor_ndp);
+ dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk),
+ (unsigned long long)opt_recv->dccpor_ndp);
break;
case DCCPO_CHANGE_L:
/* fall through */
@@ -307,9 +307,11 @@ static void dccp_encode_value_var(const u32 value, unsigned char *to,
*to++ = (value & 0xFF);
}
-static inline int dccp_ndp_len(const int ndp)
+static inline u8 dccp_ndp_len(const u64 ndp)
{
- return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
+ if (likely(ndp <= 0xFF))
+ return 1;
+ return likely(ndp <= USHORT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6);
}
int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
@@ -336,7 +338,7 @@ EXPORT_SYMBOL_GPL(dccp_insert_option);
static int dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
- int ndp = dp->dccps_ndp_count;
+ u64 ndp = dp->dccps_ndp_count;
if (dccp_non_data_packet(skb))
++dp->dccps_ndp_count;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 9dfe2470962c..a0b56009611f 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -237,7 +237,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
EXPORT_SYMBOL_GPL(dccp_init_sock);
-int dccp_destroy_sock(struct sock *sk)
+void dccp_destroy_sock(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_minisock *dmsk = dccp_msk(sk);
@@ -268,8 +268,6 @@ int dccp_destroy_sock(struct sock *sk)
/* clean up feature negotiation state */
dccp_feat_clean(dmsk);
-
- return 0;
}
EXPORT_SYMBOL_GPL(dccp_destroy_sock);
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 8703a792b560..3608d5342ca2 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -224,7 +224,7 @@ static void dccp_delack_timer(unsigned long data)
if (sock_owned_by_user(sk)) {
/* Try again later. */
icsk->icsk_ack.blocked = 1;
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
sk_reset_timer(sk, &icsk->icsk_delack_timer,
jiffies + TCP_DELACK_MIN);
goto out;
@@ -254,7 +254,7 @@ static void dccp_delack_timer(unsigned long data)
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
dccp_send_ack(sk);
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);
}
out:
bh_unlock_sock(sk);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index fc2efe899e91..61b7df577ddd 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -451,7 +451,7 @@ static void dn_destruct(struct sock *sk)
static int dn_memory_pressure;
-static void dn_enter_memory_pressure(void)
+static void dn_enter_memory_pressure(struct sock *sk)
{
if (!dn_memory_pressure) {
dn_memory_pressure = 1;
@@ -1719,6 +1719,8 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
* See if there is data ready to read, sleep if there isn't
*/
for(;;) {
+ DEFINE_WAIT(wait);
+
if (sk->sk_err)
goto out;
@@ -1748,14 +1750,11 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
goto out;
}
- set_bit(SOCK_ASYNC_WAITDATA, &sock->flags);
- SOCK_SLEEP_PRE(sk)
-
- if (!dn_data_ready(sk, queue, flags, target))
- schedule();
-
- SOCK_SLEEP_POST(sk)
- clear_bit(SOCK_ASYNC_WAITDATA, &sock->flags);
+ prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+ set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target));
+ clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ finish_wait(sk->sk_sleep, &wait);
}
for(skb = queue->next; skb != (struct sk_buff *)queue; skb = nskb) {
@@ -2002,18 +2001,19 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
* size.
*/
if (dn_queue_too_long(scp, queue, flags)) {
+ DEFINE_WAIT(wait);
+
if (flags & MSG_DONTWAIT) {
err = -EWOULDBLOCK;
goto out;
}
- SOCK_SLEEP_PRE(sk)
-
- if (dn_queue_too_long(scp, queue, flags))
- schedule();
-
- SOCK_SLEEP_POST(sk)
-
+ prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+ set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ sk_wait_event(sk, &timeo,
+ !dn_queue_too_long(scp, queue, flags));
+ clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+ finish_wait(sk->sk_sleep, &wait);
continue;
}
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 5b7539b7fe0c..14fbca55e908 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -229,7 +229,7 @@ static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops)
return 0;
}
-static void dn_fib_rule_flush_cache(void)
+static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
{
dn_rt_cache_flush(-1);
}
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 7c9bb13b1539..d35127bb84e1 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -573,9 +573,7 @@ static int econet_release(struct socket *sock)
sk->sk_state_change(sk); /* It is useless. Just for sanity. */
- sock->sk = NULL;
- sk->sk_socket = NULL;
- sock_set_flag(sk, SOCK_DEAD);
+ sock_orphan(sk);
/* Purge queues */
diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c
index 200ee1e63728..69dbc342a464 100644
--- a/net/ieee80211/ieee80211_rx.c
+++ b/net/ieee80211/ieee80211_rx.c
@@ -391,7 +391,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
wstats.updated = 0;
if (rx_stats->mask & IEEE80211_STATMASK_RSSI) {
- wstats.level = rx_stats->rssi;
+ wstats.level = rx_stats->signal;
wstats.updated |= IW_QUAL_LEVEL_UPDATED;
} else
wstats.updated |= IW_QUAL_LEVEL_INVALID;
diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c
index d8b02603cbe5..d996547f7a62 100644
--- a/net/ieee80211/ieee80211_tx.c
+++ b/net/ieee80211/ieee80211_tx.c
@@ -542,90 +542,4 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
return 1;
}
-/* Incoming 802.11 strucure is converted to a TXB
- * a block of 802.11 fragment packets (stored as skbs) */
-int ieee80211_tx_frame(struct ieee80211_device *ieee,
- struct ieee80211_hdr *frame, int hdr_len, int total_len,
- int encrypt_mpdu)
-{
- struct ieee80211_txb *txb = NULL;
- unsigned long flags;
- struct net_device_stats *stats = &ieee->stats;
- struct sk_buff *skb_frag;
- int priority = -1;
- int fraglen = total_len;
- int headroom = ieee->tx_headroom;
- struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx];
-
- spin_lock_irqsave(&ieee->lock, flags);
-
- if (encrypt_mpdu && (!ieee->sec.encrypt || !crypt))
- encrypt_mpdu = 0;
-
- /* If there is no driver handler to take the TXB, dont' bother
- * creating it... */
- if (!ieee->hard_start_xmit) {
- printk(KERN_WARNING "%s: No xmit handler.\n", ieee->dev->name);
- goto success;
- }
-
- if (unlikely(total_len < 24)) {
- printk(KERN_WARNING "%s: skb too small (%d).\n",
- ieee->dev->name, total_len);
- goto success;
- }
-
- if (encrypt_mpdu) {
- frame->frame_ctl |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
- fraglen += crypt->ops->extra_mpdu_prefix_len +
- crypt->ops->extra_mpdu_postfix_len;
- headroom += crypt->ops->extra_mpdu_prefix_len;
- }
-
- /* When we allocate the TXB we allocate enough space for the reserve
- * and full fragment bytes (bytes_per_frag doesn't include prefix,
- * postfix, header, FCS, etc.) */
- txb = ieee80211_alloc_txb(1, fraglen, headroom, GFP_ATOMIC);
- if (unlikely(!txb)) {
- printk(KERN_WARNING "%s: Could not allocate TXB\n",
- ieee->dev->name);
- goto failed;
- }
- txb->encrypted = 0;
- txb->payload_size = fraglen;
-
- skb_frag = txb->fragments[0];
-
- memcpy(skb_put(skb_frag, total_len), frame, total_len);
-
- if (ieee->config &
- (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS))
- skb_put(skb_frag, 4);
-
- /* To avoid overcomplicating things, we do the corner-case frame
- * encryption in software. The only real situation where encryption is
- * needed here is during software-based shared key authentication. */
- if (encrypt_mpdu)
- ieee80211_encrypt_fragment(ieee, skb_frag, hdr_len);
-
- success:
- spin_unlock_irqrestore(&ieee->lock, flags);
-
- if (txb) {
- if ((*ieee->hard_start_xmit) (txb, ieee->dev, priority) == 0) {
- stats->tx_packets++;
- stats->tx_bytes += txb->payload_size;
- return 0;
- }
- ieee80211_txb_free(txb);
- }
- return 0;
-
- failed:
- spin_unlock_irqrestore(&ieee->lock, flags);
- stats->tx_errors++;
- return 1;
-}
-
-EXPORT_SYMBOL(ieee80211_tx_frame);
EXPORT_SYMBOL(ieee80211_txb_free);
diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c
index 623489afa62c..973832dd7faf 100644
--- a/net/ieee80211/ieee80211_wx.c
+++ b/net/ieee80211/ieee80211_wx.c
@@ -43,8 +43,9 @@ static const char *ieee80211_modes[] = {
#define MAX_CUSTOM_LEN 64
static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
- char *start, char *stop,
- struct ieee80211_network *network)
+ char *start, char *stop,
+ struct ieee80211_network *network,
+ struct iw_request_info *info)
{
char custom[MAX_CUSTOM_LEN];
char *p;
@@ -57,7 +58,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
iwe.cmd = SIOCGIWAP;
iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
memcpy(iwe.u.ap_addr.sa_data, network->bssid, ETH_ALEN);
- start = iwe_stream_add_event(start, stop, &iwe, IW_EV_ADDR_LEN);
+ start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_ADDR_LEN);
/* Remaining entries will be displayed in the order we provide them */
@@ -66,17 +67,19 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
iwe.u.data.flags = 1;
if (network->flags & NETWORK_EMPTY_ESSID) {
iwe.u.data.length = sizeof("<hidden>");
- start = iwe_stream_add_point(start, stop, &iwe, "<hidden>");
+ start = iwe_stream_add_point(info, start, stop,
+ &iwe, "<hidden>");
} else {
iwe.u.data.length = min(network->ssid_len, (u8) 32);
- start = iwe_stream_add_point(start, stop, &iwe, network->ssid);
+ start = iwe_stream_add_point(info, start, stop,
+ &iwe, network->ssid);
}
/* Add the protocol name */
iwe.cmd = SIOCGIWNAME;
snprintf(iwe.u.name, IFNAMSIZ, "IEEE 802.11%s",
ieee80211_modes[network->mode]);
- start = iwe_stream_add_event(start, stop, &iwe, IW_EV_CHAR_LEN);
+ start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_CHAR_LEN);
/* Add mode */
iwe.cmd = SIOCGIWMODE;
@@ -86,7 +89,8 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
else
iwe.u.mode = IW_MODE_ADHOC;
- start = iwe_stream_add_event(start, stop, &iwe, IW_EV_UINT_LEN);
+ start = iwe_stream_add_event(info, start, stop,
+ &iwe, IW_EV_UINT_LEN);
}
/* Add channel and frequency */
@@ -95,7 +99,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
iwe.u.freq.m = ieee80211_channel_to_freq(ieee, network->channel);
iwe.u.freq.e = 6;
iwe.u.freq.i = 0;
- start = iwe_stream_add_event(start, stop, &iwe, IW_EV_FREQ_LEN);
+ start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_FREQ_LEN);
/* Add encryption capability */
iwe.cmd = SIOCGIWENCODE;
@@ -104,12 +108,13 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
else
iwe.u.data.flags = IW_ENCODE_DISABLED;
iwe.u.data.length = 0;
- start = iwe_stream_add_point(start, stop, &iwe, network->ssid);
+ start = iwe_stream_add_point(info, start, stop,
+ &iwe, network->ssid);
/* Add basic and extended rates */
/* Rate : stuffing multiple values in a single event require a bit
* more of magic - Jean II */
- current_val = start + IW_EV_LCP_LEN;
+ current_val = start + iwe_stream_lcp_len(info);
iwe.cmd = SIOCGIWRATE;
/* Those two flags are ignored... */
iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
@@ -124,17 +129,19 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
/* Bit rate given in 500 kb/s units (+ 0x80) */
iwe.u.bitrate.value = ((rate & 0x7f) * 500000);
/* Add new value to event */
- current_val = iwe_stream_add_value(start, current_val, stop, &iwe, IW_EV_PARAM_LEN);
+ current_val = iwe_stream_add_value(info, start, current_val,
+ stop, &iwe, IW_EV_PARAM_LEN);
}
for (; j < network->rates_ex_len; j++) {
rate = network->rates_ex[j] & 0x7F;
/* Bit rate given in 500 kb/s units (+ 0x80) */
iwe.u.bitrate.value = ((rate & 0x7f) * 500000);
/* Add new value to event */
- current_val = iwe_stream_add_value(start, current_val, stop, &iwe, IW_EV_PARAM_LEN);
+ current_val = iwe_stream_add_value(info, start, current_val,
+ stop, &iwe, IW_EV_PARAM_LEN);
}
/* Check if we added any rate */
- if((current_val - start) > IW_EV_LCP_LEN)
+ if ((current_val - start) > iwe_stream_lcp_len(info))
start = current_val;
/* Add quality statistics */
@@ -181,14 +188,14 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
iwe.u.qual.level = network->stats.signal;
}
- start = iwe_stream_add_event(start, stop, &iwe, IW_EV_QUAL_LEN);
+ start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_QUAL_LEN);
iwe.cmd = IWEVCUSTOM;
p = custom;
iwe.u.data.length = p - custom;
if (iwe.u.data.length)
- start = iwe_stream_add_point(start, stop, &iwe, custom);
+ start = iwe_stream_add_point(info, start, stop, &iwe, custom);
memset(&iwe, 0, sizeof(iwe));
if (network->wpa_ie_len) {
@@ -196,7 +203,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
memcpy(buf, network->wpa_ie, network->wpa_ie_len);
iwe.cmd = IWEVGENIE;
iwe.u.data.length = network->wpa_ie_len;
- start = iwe_stream_add_point(start, stop, &iwe, buf);
+ start = iwe_stream_add_point(info, start, stop, &iwe, buf);
}
memset(&iwe, 0, sizeof(iwe));
@@ -205,7 +212,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
memcpy(buf, network->rsn_ie, network->rsn_ie_len);
iwe.cmd = IWEVGENIE;
iwe.u.data.length = network->rsn_ie_len;
- start = iwe_stream_add_point(start, stop, &iwe, buf);
+ start = iwe_stream_add_point(info, start, stop, &iwe, buf);
}
/* Add EXTRA: Age to display seconds since last beacon/probe response
@@ -217,7 +224,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
jiffies_to_msecs(jiffies - network->last_scanned));
iwe.u.data.length = p - custom;
if (iwe.u.data.length)
- start = iwe_stream_add_point(start, stop, &iwe, custom);
+ start = iwe_stream_add_point(info, start, stop, &iwe, custom);
/* Add spectrum management information */
iwe.cmd = -1;
@@ -238,7 +245,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
if (iwe.cmd == IWEVCUSTOM) {
iwe.u.data.length = p - custom;
- start = iwe_stream_add_point(start, stop, &iwe, custom);
+ start = iwe_stream_add_point(info, start, stop, &iwe, custom);
}
return start;
@@ -272,7 +279,8 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
if (ieee->scan_age == 0 ||
time_after(network->last_scanned + ieee->scan_age, jiffies))
- ev = ieee80211_translate_scan(ieee, ev, stop, network);
+ ev = ieee80211_translate_scan(ieee, ev, stop, network,
+ info);
else
IEEE80211_DEBUG_SCAN("Not showing network '%s ("
"%s)' due to age (%dms).\n",
@@ -744,98 +752,9 @@ int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee,
return 0;
}
-int ieee80211_wx_set_auth(struct net_device *dev,
- struct iw_request_info *info,
- union iwreq_data *wrqu,
- char *extra)
-{
- struct ieee80211_device *ieee = netdev_priv(dev);
- unsigned long flags;
- int err = 0;
-
- spin_lock_irqsave(&ieee->lock, flags);
-
- switch (wrqu->param.flags & IW_AUTH_INDEX) {
- case IW_AUTH_WPA_VERSION:
- case IW_AUTH_CIPHER_PAIRWISE:
- case IW_AUTH_CIPHER_GROUP:
- case IW_AUTH_KEY_MGMT:
- /*
- * Host AP driver does not use these parameters and allows
- * wpa_supplicant to control them internally.
- */
- break;
- case IW_AUTH_TKIP_COUNTERMEASURES:
- break; /* FIXME */
- case IW_AUTH_DROP_UNENCRYPTED:
- ieee->drop_unencrypted = !!wrqu->param.value;
- break;
- case IW_AUTH_80211_AUTH_ALG:
- break; /* FIXME */
- case IW_AUTH_WPA_ENABLED:
- ieee->privacy_invoked = ieee->wpa_enabled = !!wrqu->param.value;
- break;
- case IW_AUTH_RX_UNENCRYPTED_EAPOL:
- ieee->ieee802_1x = !!wrqu->param.value;
- break;
- case IW_AUTH_PRIVACY_INVOKED:
- ieee->privacy_invoked = !!wrqu->param.value;
- break;
- default:
- err = -EOPNOTSUPP;
- break;
- }
- spin_unlock_irqrestore(&ieee->lock, flags);
- return err;
-}
-
-int ieee80211_wx_get_auth(struct net_device *dev,
- struct iw_request_info *info,
- union iwreq_data *wrqu,
- char *extra)
-{
- struct ieee80211_device *ieee = netdev_priv(dev);
- unsigned long flags;
- int err = 0;
-
- spin_lock_irqsave(&ieee->lock, flags);
-
- switch (wrqu->param.flags & IW_AUTH_INDEX) {
- case IW_AUTH_WPA_VERSION:
- case IW_AUTH_CIPHER_PAIRWISE:
- case IW_AUTH_CIPHER_GROUP:
- case IW_AUTH_KEY_MGMT:
- case IW_AUTH_TKIP_COUNTERMEASURES: /* FIXME */
- case IW_AUTH_80211_AUTH_ALG: /* FIXME */
- /*
- * Host AP driver does not use these parameters and allows
- * wpa_supplicant to control them internally.
- */
- err = -EOPNOTSUPP;
- break;
- case IW_AUTH_DROP_UNENCRYPTED:
- wrqu->param.value = ieee->drop_unencrypted;
- break;
- case IW_AUTH_WPA_ENABLED:
- wrqu->param.value = ieee->wpa_enabled;
- break;
- case IW_AUTH_RX_UNENCRYPTED_EAPOL:
- wrqu->param.value = ieee->ieee802_1x;
- break;
- default:
- err = -EOPNOTSUPP;
- break;
- }
- spin_unlock_irqrestore(&ieee->lock, flags);
- return err;
-}
-
EXPORT_SYMBOL(ieee80211_wx_set_encodeext);
EXPORT_SYMBOL(ieee80211_wx_get_encodeext);
EXPORT_SYMBOL(ieee80211_wx_get_scan);
EXPORT_SYMBOL(ieee80211_wx_set_encode);
EXPORT_SYMBOL(ieee80211_wx_get_encode);
-
-EXPORT_SYMBOL_GPL(ieee80211_wx_set_auth);
-EXPORT_SYMBOL_GPL(ieee80211_wx_get_auth);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 24eca23c2db3..dd919d84285f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,8 +5,6 @@
*
* PF_INET protocol family socket handler.
*
- * Version: $Id: af_inet.c,v 1.137 2002/02/01 22:01:03 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Florian La Roche, <flla@stud.uni-sb.de>
@@ -112,12 +110,11 @@
#include <net/ipip.h>
#include <net/inet_common.h>
#include <net/xfrm.h>
+#include <net/net_namespace.h>
#ifdef CONFIG_IP_MROUTE
#include <linux/mroute.h>
#endif
-DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly;
-
extern void ip_mc_drop_socket(struct sock *sk);
/* The inetsw table contains everything that inet_create needs to
@@ -1341,50 +1338,70 @@ static struct net_protocol icmp_protocol = {
.netns_ok = 1,
};
-static int __init init_ipv4_mibs(void)
+static __net_init int ipv4_mib_init_net(struct net *net)
{
- if (snmp_mib_init((void **)net_statistics,
- sizeof(struct linux_mib)) < 0)
- goto err_net_mib;
- if (snmp_mib_init((void **)ip_statistics,
- sizeof(struct ipstats_mib)) < 0)
- goto err_ip_mib;
- if (snmp_mib_init((void **)icmp_statistics,
- sizeof(struct icmp_mib)) < 0)
- goto err_icmp_mib;
- if (snmp_mib_init((void **)icmpmsg_statistics,
- sizeof(struct icmpmsg_mib)) < 0)
- goto err_icmpmsg_mib;
- if (snmp_mib_init((void **)tcp_statistics,
+ if (snmp_mib_init((void **)net->mib.tcp_statistics,
sizeof(struct tcp_mib)) < 0)
goto err_tcp_mib;
- if (snmp_mib_init((void **)udp_statistics,
+ if (snmp_mib_init((void **)net->mib.ip_statistics,
+ sizeof(struct ipstats_mib)) < 0)
+ goto err_ip_mib;
+ if (snmp_mib_init((void **)net->mib.net_statistics,
+ sizeof(struct linux_mib)) < 0)
+ goto err_net_mib;
+ if (snmp_mib_init((void **)net->mib.udp_statistics,
sizeof(struct udp_mib)) < 0)
goto err_udp_mib;
- if (snmp_mib_init((void **)udplite_statistics,
+ if (snmp_mib_init((void **)net->mib.udplite_statistics,
sizeof(struct udp_mib)) < 0)
goto err_udplite_mib;
+ if (snmp_mib_init((void **)net->mib.icmp_statistics,
+ sizeof(struct icmp_mib)) < 0)
+ goto err_icmp_mib;
+ if (snmp_mib_init((void **)net->mib.icmpmsg_statistics,
+ sizeof(struct icmpmsg_mib)) < 0)
+ goto err_icmpmsg_mib;
- tcp_mib_init();
-
+ tcp_mib_init(net);
return 0;
-err_udplite_mib:
- snmp_mib_free((void **)udp_statistics);
-err_udp_mib:
- snmp_mib_free((void **)tcp_statistics);
-err_tcp_mib:
- snmp_mib_free((void **)icmpmsg_statistics);
err_icmpmsg_mib:
- snmp_mib_free((void **)icmp_statistics);
+ snmp_mib_free((void **)net->mib.icmp_statistics);
err_icmp_mib:
- snmp_mib_free((void **)ip_statistics);
-err_ip_mib:
- snmp_mib_free((void **)net_statistics);
+ snmp_mib_free((void **)net->mib.udplite_statistics);
+err_udplite_mib:
+ snmp_mib_free((void **)net->mib.udp_statistics);
+err_udp_mib:
+ snmp_mib_free((void **)net->mib.net_statistics);
err_net_mib:
+ snmp_mib_free((void **)net->mib.ip_statistics);
+err_ip_mib:
+ snmp_mib_free((void **)net->mib.tcp_statistics);
+err_tcp_mib:
return -ENOMEM;
}
+static __net_exit void ipv4_mib_exit_net(struct net *net)
+{
+ snmp_mib_free((void **)net->mib.icmpmsg_statistics);
+ snmp_mib_free((void **)net->mib.icmp_statistics);
+ snmp_mib_free((void **)net->mib.udplite_statistics);
+ snmp_mib_free((void **)net->mib.udp_statistics);
+ snmp_mib_free((void **)net->mib.net_statistics);
+ snmp_mib_free((void **)net->mib.ip_statistics);
+ snmp_mib_free((void **)net->mib.tcp_statistics);
+}
+
+static __net_initdata struct pernet_operations ipv4_mib_ops = {
+ .init = ipv4_mib_init_net,
+ .exit = ipv4_mib_exit_net,
+};
+
+static int __init init_ipv4_mibs(void)
+{
+ return register_pernet_subsys(&ipv4_mib_ops);
+}
+
static int ipv4_proc_init(void);
/*
@@ -1481,14 +1498,15 @@ static int __init inet_init(void)
* Initialise the multicast router
*/
#if defined(CONFIG_IP_MROUTE)
- ip_mr_init();
+ if (ip_mr_init())
+ printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n");
#endif
/*
* Initialise per-cpu ipv4 mibs
*/
if (init_ipv4_mibs())
- printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ;
+ printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n");
ipv4_proc_init();
@@ -1560,5 +1578,4 @@ EXPORT_SYMBOL(inet_sock_destruct);
EXPORT_SYMBOL(inet_stream_connect);
EXPORT_SYMBOL(inet_stream_ops);
EXPORT_SYMBOL(inet_unregister_protosw);
-EXPORT_SYMBOL(net_statistics);
EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 9b539fa9fe18..b043eda60b04 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1,7 +1,5 @@
/* linux/net/ipv4/arp.c
*
- * Version: $Id: arp.c,v 1.99 2001/08/30 22:55:42 davem Exp $
- *
* Copyright (C) 1994 by Florian La Roche
*
* This module implements the Address Resolution Protocol ARP (RFC 826),
@@ -423,11 +421,12 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
struct rtable *rt;
int flag = 0;
/*unsigned long now; */
+ struct net *net = dev_net(dev);
- if (ip_route_output_key(dev_net(dev), &rt, &fl) < 0)
+ if (ip_route_output_key(net, &rt, &fl) < 0)
return 1;
if (rt->u.dst.dev != dev) {
- NET_INC_STATS_BH(LINUX_MIB_ARPFILTER);
+ NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
flag = 1;
}
ip_rt_put(rt);
@@ -1199,7 +1198,7 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo
switch (event) {
case NETDEV_CHANGEADDR:
neigh_changeaddr(&arp_tbl, dev);
- rt_cache_flush(0);
+ rt_cache_flush(dev_net(dev), 0);
break;
default:
break;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 0c0c73f368ce..5e6c5a0f3fde 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -52,7 +52,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->sport, usin->sin_port, sk, 1);
if (err) {
if (err == -ENETUNREACH)
- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return err;
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 79a7ef6209ff..2e667e2f90df 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1,8 +1,6 @@
/*
* NET3 IP device support routines.
*
- * Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -170,6 +168,8 @@ static struct in_device *inetdev_init(struct net_device *dev)
in_dev->dev = dev;
if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
goto out_kfree;
+ if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
+ dev_disable_lro(dev);
/* Reference in_dev->dev */
dev_hold(dev);
/* Account for reference dev->ip_ptr (below) */
@@ -1013,7 +1013,7 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
memcpy(old, ifa->ifa_label, IFNAMSIZ);
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
if (named++ == 0)
- continue;
+ goto skip;
dot = strchr(old, ':');
if (dot == NULL) {
sprintf(old, ":%d", named);
@@ -1024,6 +1024,8 @@ static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
} else {
strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
}
+skip:
+ rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
}
}
@@ -1241,6 +1243,8 @@ static void inet_forward_change(struct net *net)
read_lock(&dev_base_lock);
for_each_netdev(net, dev) {
struct in_device *in_dev;
+ if (on)
+ dev_disable_lro(dev);
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (in_dev)
@@ -1248,8 +1252,6 @@ static void inet_forward_change(struct net *net)
rcu_read_unlock();
}
read_unlock(&dev_base_lock);
-
- rt_cache_flush(0);
}
static int devinet_conf_proc(ctl_table *ctl, int write,
@@ -1335,10 +1337,19 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
if (write && *valp != val) {
struct net *net = ctl->extra2;
- if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
- inet_forward_change(net);
- else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
- rt_cache_flush(0);
+ if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
+ rtnl_lock();
+ if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
+ inet_forward_change(net);
+ } else if (*valp) {
+ struct ipv4_devconf *cnf = ctl->extra1;
+ struct in_device *idev =
+ container_of(cnf, struct in_device, cnf);
+ dev_disable_lro(idev->dev);
+ }
+ rtnl_unlock();
+ rt_cache_flush(net, 0);
+ }
}
return ret;
@@ -1351,9 +1362,10 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write,
int *valp = ctl->data;
int val = *valp;
int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ struct net *net = ctl->extra2;
if (write && *valp != val)
- rt_cache_flush(0);
+ rt_cache_flush(net, 0);
return ret;
}
@@ -1364,9 +1376,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
{
int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
newval, newlen);
+ struct net *net = table->extra2;
if (ret == 1)
- rt_cache_flush(0);
+ rt_cache_flush(net, 0);
return ret;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 0b2ac6a3d903..65c1503f8cc8 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -5,8 +5,6 @@
*
* IPv4 Forwarding Information Base: FIB frontend.
*
- * Version: $Id: fib_frontend.c,v 1.26 2001/10/31 21:55:54 davem Exp $
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
@@ -146,7 +144,7 @@ static void fib_flush(struct net *net)
}
if (flushed)
- rt_cache_flush(-1);
+ rt_cache_flush(net, -1);
}
/*
@@ -899,21 +897,22 @@ static void fib_disable_ip(struct net_device *dev, int force)
{
if (fib_sync_down_dev(dev, force))
fib_flush(dev_net(dev));
- rt_cache_flush(0);
+ rt_cache_flush(dev_net(dev), 0);
arp_ifdown(dev);
}
static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct in_ifaddr *ifa = (struct in_ifaddr*)ptr;
+ struct net_device *dev = ifa->ifa_dev->dev;
switch (event) {
case NETDEV_UP:
fib_add_ifaddr(ifa);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- fib_sync_up(ifa->ifa_dev->dev);
+ fib_sync_up(dev);
#endif
- rt_cache_flush(-1);
+ rt_cache_flush(dev_net(dev), -1);
break;
case NETDEV_DOWN:
fib_del_ifaddr(ifa);
@@ -921,9 +920,9 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
/* Last address was deleted from this interface.
Disable IP.
*/
- fib_disable_ip(ifa->ifa_dev->dev, 1);
+ fib_disable_ip(dev, 1);
} else {
- rt_cache_flush(-1);
+ rt_cache_flush(dev_net(dev), -1);
}
break;
}
@@ -951,14 +950,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
#ifdef CONFIG_IP_ROUTE_MULTIPATH
fib_sync_up(dev);
#endif
- rt_cache_flush(-1);
+ rt_cache_flush(dev_net(dev), -1);
break;
case NETDEV_DOWN:
fib_disable_ip(dev, 0);
break;
case NETDEV_CHANGEMTU:
case NETDEV_CHANGE:
- rt_cache_flush(0);
+ rt_cache_flush(dev_net(dev), 0);
break;
}
return NOTIFY_DONE;
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 2e2fc3376ac9..c8cac6c7f881 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -5,8 +5,6 @@
*
* IPv4 FIB: lookup engine and maintenance routines.
*
- * Version: $Id: fib_hash.c,v 1.13 2001/10/31 21:55:54 davem Exp $
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
@@ -474,7 +472,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
fib_release_info(fi_drop);
if (state & FA_S_ACCESSED)
- rt_cache_flush(-1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id,
&cfg->fc_nlinfo, NLM_F_REPLACE);
return 0;
@@ -534,7 +532,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg)
if (new_f)
fz->fz_nent++;
- rt_cache_flush(-1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id,
&cfg->fc_nlinfo, 0);
@@ -616,7 +614,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg)
write_unlock_bh(&fib_hash_lock);
if (fa->fa_state & FA_S_ACCESSED)
- rt_cache_flush(-1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
fn_free_alias(fa, f);
if (kill_fn) {
fn_free_node(f);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 1fb56876be54..6080d7120821 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -258,9 +258,9 @@ static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
+ nla_total_size(4); /* flow */
}
-static void fib4_rule_flush_cache(void)
+static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
{
- rt_cache_flush(-1);
+ rt_cache_flush(ops->fro_net, -1);
}
static struct fib_rules_ops fib4_rules_ops_template = {
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0d4d72827e4b..ded2ae34eab1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -5,8 +5,6 @@
*
* IPv4 Forwarding Information Base: semantics.
*
- * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4b02d14e7ab9..5cb72786a8af 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -22,8 +22,6 @@
* IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
* IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
*
- * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
- *
*
* Code from fib_hash has been reused which includes the following header:
*
@@ -1273,7 +1271,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
fib_release_info(fi_drop);
if (state & FA_S_ACCESSED)
- rt_cache_flush(-1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
@@ -1318,7 +1316,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
list_add_tail_rcu(&new_fa->fa_list,
(fa ? &fa->fa_list : fa_head));
- rt_cache_flush(-1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
succeeded:
@@ -1359,17 +1357,17 @@ static int check_leaf(struct trie *t, struct leaf *l,
t->stats.semantic_match_miss++;
#endif
if (err <= 0)
- return plen;
+ return err;
}
- return -1;
+ return 1;
}
static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
struct fib_result *res)
{
struct trie *t = (struct trie *) tb->tb_data;
- int plen, ret = 0;
+ int ret;
struct node *n;
struct tnode *pn;
int pos, bits;
@@ -1393,10 +1391,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
/* Just a leaf? */
if (IS_LEAF(n)) {
- plen = check_leaf(t, (struct leaf *)n, key, flp, res);
- if (plen < 0)
- goto failed;
- ret = 0;
+ ret = check_leaf(t, (struct leaf *)n, key, flp, res);
goto found;
}
@@ -1421,11 +1416,9 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
}
if (IS_LEAF(n)) {
- plen = check_leaf(t, (struct leaf *)n, key, flp, res);
- if (plen < 0)
+ ret = check_leaf(t, (struct leaf *)n, key, flp, res);
+ if (ret > 0)
goto backtrace;
-
- ret = 0;
goto found;
}
@@ -1666,7 +1659,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
trie_leaf_remove(t, l);
if (fa->fa_state & FA_S_ACCESSED)
- rt_cache_flush(-1);
+ rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
@@ -2258,25 +2251,7 @@ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
static int fib_triestat_seq_open(struct inode *inode, struct file *file)
{
- int err;
- struct net *net;
-
- net = get_proc_net(inode);
- if (net == NULL)
- return -ENXIO;
- err = single_open(file, fib_triestat_seq_show, net);
- if (err < 0) {
- put_net(net);
- return err;
- }
- return 0;
-}
-
-static int fib_triestat_seq_release(struct inode *ino, struct file *f)
-{
- struct seq_file *seq = f->private_data;
- put_net(seq->private);
- return single_release(ino, f);
+ return single_open_net(inode, file, fib_triestat_seq_show);
}
static const struct file_operations fib_triestat_fops = {
@@ -2284,7 +2259,7 @@ static const struct file_operations fib_triestat_fops = {
.open = fib_triestat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = fib_triestat_seq_release,
+ .release = single_release_net,
};
static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 87397351ddac..860558633b2c 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -3,8 +3,6 @@
*
* Alan Cox, <alan@redhat.com>
*
- * Version: $Id: icmp.c,v 1.85 2002/02/01 22:01:03 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -113,12 +111,6 @@ struct icmp_bxm {
unsigned char optbuf[40];
};
-/*
- * Statistics
- */
-DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly;
-DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics) __read_mostly;
-
/* An array of errno for error messages from dest unreach. */
/* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
@@ -298,10 +290,10 @@ out:
/*
* Maintain the counters used in the SNMP statistics for outgoing ICMP
*/
-void icmp_out_count(unsigned char type)
+void icmp_out_count(struct net *net, unsigned char type)
{
- ICMPMSGOUT_INC_STATS(type);
- ICMP_INC_STATS(ICMP_MIB_OUTMSGS);
+ ICMPMSGOUT_INC_STATS(net, type);
+ ICMP_INC_STATS(net, ICMP_MIB_OUTMSGS);
}
/*
@@ -765,7 +757,7 @@ static void icmp_unreach(struct sk_buff *skb)
out:
return;
out_err:
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
goto out;
}
@@ -805,7 +797,7 @@ static void icmp_redirect(struct sk_buff *skb)
out:
return;
out_err:
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(dev_net(skb->dev), ICMP_MIB_INERRORS);
goto out;
}
@@ -876,7 +868,7 @@ static void icmp_timestamp(struct sk_buff *skb)
out:
return;
out_err:
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(dev_net(skb->dst->dev), ICMP_MIB_INERRORS);
goto out;
}
@@ -975,6 +967,7 @@ int icmp_rcv(struct sk_buff *skb)
{
struct icmphdr *icmph;
struct rtable *rt = skb->rtable;
+ struct net *net = dev_net(rt->u.dst.dev);
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
int nh;
@@ -995,7 +988,7 @@ int icmp_rcv(struct sk_buff *skb)
skb_set_network_header(skb, nh);
}
- ICMP_INC_STATS_BH(ICMP_MIB_INMSGS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INMSGS);
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
@@ -1013,7 +1006,7 @@ int icmp_rcv(struct sk_buff *skb)
icmph = icmp_hdr(skb);
- ICMPMSGIN_INC_STATS_BH(icmph->type);
+ ICMPMSGIN_INC_STATS_BH(net, icmph->type);
/*
* 18 is the highest 'known' ICMP type. Anything else is a mystery
*
@@ -1029,9 +1022,6 @@ int icmp_rcv(struct sk_buff *skb)
*/
if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
- struct net *net;
-
- net = dev_net(rt->u.dst.dev);
/*
* RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
* silently ignored (we let user decide with a sysctl).
@@ -1057,7 +1047,7 @@ drop:
kfree_skb(skb);
return 0;
error:
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
goto drop;
}
@@ -1217,5 +1207,4 @@ int __init icmp_init(void)
EXPORT_SYMBOL(icmp_err_convert);
EXPORT_SYMBOL(icmp_send);
-EXPORT_SYMBOL(icmp_statistics);
EXPORT_SYMBOL(xrlim_allow);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2769dc4a4c84..68e84a933e90 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -8,8 +8,6 @@
* the older version didn't come out right using gcc 2.5.8, the newer one
* seems to fall out with gcc 2.6.2.
*
- * Version: $Id: igmp.c,v 1.47 2002/02/01 22:01:03 davem Exp $
- *
* Authors:
* Alan Cox <Alan.Cox@linux.org>
*
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ec834480abe7..bb81c958b744 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -103,7 +103,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
rover = net_random() % remaining + low;
do {
- head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
+ head = &hashinfo->bhash[inet_bhashfn(net, rover,
+ hashinfo->bhash_size)];
spin_lock(&head->lock);
inet_bind_bucket_for_each(tb, node, &head->chain)
if (tb->ib_net == net && tb->port == rover)
@@ -130,7 +131,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
*/
snum = rover;
} else {
- head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
+ head = &hashinfo->bhash[inet_bhashfn(net, snum,
+ hashinfo->bhash_size)];
spin_lock(&head->lock);
inet_bind_bucket_for_each(tb, node, &head->chain)
if (tb->ib_net == net && tb->port == snum)
@@ -336,15 +338,16 @@ struct dst_entry* inet_csk_route_req(struct sock *sk,
.uli_u = { .ports =
{ .sport = inet_sk(sk)->sport,
.dport = ireq->rmt_port } } };
+ struct net *net = sock_net(sk);
security_req_classify_flow(req, &fl);
- if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) {
- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ if (ip_route_output_flow(net, &rt, &fl, sk, 0)) {
+ IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) {
ip_rt_put(rt);
- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
return &rt->u.dst;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index da97695e7096..c10036e7a463 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1,8 +1,6 @@
/*
* inet_diag.c Module for monitoring INET transport protocols sockets.
*
- * Version: $Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2023d37b2708..115f53722d20 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -70,7 +70,8 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
static void __inet_put_port(struct sock *sk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
- const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
+ const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->num,
+ hashinfo->bhash_size);
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
struct inet_bind_bucket *tb;
@@ -95,7 +96,8 @@ EXPORT_SYMBOL(inet_put_port);
void __inet_inherit_port(struct sock *sk, struct sock *child)
{
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
- const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
+ const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->num,
+ table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
struct inet_bind_bucket *tb;
@@ -192,7 +194,7 @@ struct sock *__inet_lookup_listener(struct net *net,
const struct hlist_head *head;
read_lock(&hashinfo->lhash_lock);
- head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
+ head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)];
if (!hlist_empty(head)) {
const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
@@ -225,7 +227,7 @@ struct sock * __inet_lookup_established(struct net *net,
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
- unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
+ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
@@ -265,13 +267,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
int dif = sk->sk_bound_dev_if;
INET_ADDR_COOKIE(acookie, saddr, daddr)
const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
- unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+ struct net *net = sock_net(sk);
+ unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_node *node;
struct inet_timewait_sock *tw;
- struct net *net = sock_net(sk);
prefetch(head->chain.first);
write_lock(lock);
@@ -310,11 +312,11 @@ unique:
if (twp) {
*twp = tw;
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+ NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
} else if (tw) {
/* Silly. Should hash-dance instead... */
inet_twsk_deschedule(tw, death_row);
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+ NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
inet_twsk_put(tw);
}
@@ -438,7 +440,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
local_bh_disable();
for (i = 1; i <= remaining; i++) {
port = low + (i + offset) % remaining;
- head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
spin_lock(&head->lock);
/* Does not bother with rcv_saddr checks,
@@ -493,7 +496,7 @@ ok:
goto out;
}
- head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
tb = inet_csk(sk)->icsk_bind_hash;
spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index ce16e9ac24c1..75c2def8f9a0 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -32,7 +32,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
write_unlock(lock);
/* Disassociate with bind bucket. */
- bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
+ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
+ hashinfo->bhash_size)];
spin_lock(&bhead->lock);
tb = tw->tw_tb;
__hlist_del(&tw->tw_bind_node);
@@ -81,7 +82,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
- bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
+ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->num,
+ hashinfo->bhash_size)];
spin_lock(&bhead->lock);
tw->tw_tb = icsk->icsk_bind_hash;
BUG_TRAP(icsk->icsk_bind_hash);
@@ -158,6 +160,9 @@ rescan:
__inet_twsk_del_dead_node(tw);
spin_unlock(&twdr->death_lock);
__inet_twsk_kill(tw, twdr->hashinfo);
+#ifdef CONFIG_NET_NS
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);
+#endif
inet_twsk_put(tw);
killed++;
spin_lock(&twdr->death_lock);
@@ -176,8 +181,9 @@ rescan:
}
twdr->tw_count -= killed;
- NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
-
+#ifndef CONFIG_NET_NS
+ NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed);
+#endif
return ret;
}
@@ -370,6 +376,9 @@ void inet_twdr_twcal_tick(unsigned long data)
&twdr->twcal_row[slot]) {
__inet_twsk_del_dead_node(tw);
__inet_twsk_kill(tw, twdr->hashinfo);
+#ifdef CONFIG_NET_NS
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
+#endif
inet_twsk_put(tw);
killed++;
}
@@ -393,7 +402,9 @@ void inet_twdr_twcal_tick(unsigned long data)
out:
if ((twdr->tw_count -= killed) == 0)
del_timer(&twdr->tw_timer);
- NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
+#ifndef CONFIG_NET_NS
+ NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed);
+#endif
spin_unlock(&twdr->death_lock);
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index af995198f643..a456ceeac3f2 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -3,8 +3,6 @@
*
* This source is covered by the GNU GPL, the same as all kernel sources.
*
- * Version: $Id: inetpeer.c,v 1.7 2001/09/20 21:22:50 davem Exp $
- *
* Authors: Andrey V. Savochkin <saw@msu.ru>
*/
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 4813c39b438b..450016b89a18 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -5,8 +5,6 @@
*
* The IP forwarding functionality.
*
- * Version: $Id: ip_forward.c,v 1.48 2000/12/13 18:31:48 davem Exp $
- *
* Authors: see ip.c
*
* Fixes:
@@ -44,7 +42,7 @@ static int ip_forward_finish(struct sk_buff *skb)
{
struct ip_options * opt = &(IPCB(skb)->opt);
- IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
if (unlikely(opt->optlen))
ip_forward_options(skb);
@@ -58,6 +56,9 @@ int ip_forward(struct sk_buff *skb)
struct rtable *rt; /* Route we use */
struct ip_options * opt = &(IPCB(skb)->opt);
+ if (skb_warn_if_lro(skb))
+ goto drop;
+
if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb))
goto drop;
@@ -87,7 +88,7 @@ int ip_forward(struct sk_buff *skb)
if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) &&
(ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
- IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(dst_mtu(&rt->u.dst)));
goto drop;
@@ -122,7 +123,7 @@ sr_failed:
too_many_hops:
/* Tell the sender its packet died... */
- IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
drop:
kfree_skb(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 37221f659159..38d38f058018 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -5,8 +5,6 @@
*
* The IP fragmentation functionality.
*
- * Version: $Id: ip_fragment.c,v 1.59 2002/01/12 07:54:56 davem Exp $
- *
* Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG>
* Alan Cox <Alan.Cox@linux.org>
*
@@ -180,7 +178,7 @@ static void ip_evictor(struct net *net)
evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags);
if (evicted)
- IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted);
+ IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
}
/*
@@ -189,8 +187,10 @@ static void ip_evictor(struct net *net)
static void ip_expire(unsigned long arg)
{
struct ipq *qp;
+ struct net *net;
qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
+ net = container_of(qp->q.net, struct net, ipv4.frags);
spin_lock(&qp->q.lock);
@@ -199,14 +199,12 @@ static void ip_expire(unsigned long arg)
ipq_kill(qp);
- IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT);
- IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
struct sk_buff *head = qp->q.fragments;
- struct net *net;
- net = container_of(qp->q.net, struct net, ipv4.frags);
/* Send an ICMP "Fragment Reassembly Timeout" message. */
if ((head->dev = dev_get_by_index(net, qp->iif)) != NULL) {
icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
@@ -263,7 +261,10 @@ static inline int ip_frag_too_far(struct ipq *qp)
rc = qp->q.fragments && (end - start) > max;
if (rc) {
- IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ struct net *net;
+
+ net = container_of(qp->q.net, struct net, ipv4.frags);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
}
return rc;
@@ -547,7 +548,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
iph = ip_hdr(head);
iph->frag_off = 0;
iph->tot_len = htons(len);
- IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMOKS);
qp->q.fragments = NULL;
return 0;
@@ -562,7 +563,7 @@ out_oversize:
"Oversized IP packet from " NIPQUAD_FMT ".\n",
NIPQUAD(qp->saddr));
out_fail:
- IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMFAILS);
return err;
}
@@ -572,9 +573,9 @@ int ip_defrag(struct sk_buff *skb, u32 user)
struct ipq *qp;
struct net *net;
- IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
-
net = skb->dev ? dev_net(skb->dev) : dev_net(skb->dst->dev);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
+
/* Start by cleaning up the memory. */
if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
ip_evictor(net);
@@ -592,7 +593,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
return ret;
}
- IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
kfree_skb(skb);
return -ENOMEM;
}
@@ -600,7 +601,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
#ifdef CONFIG_SYSCTL
static int zero;
-static struct ctl_table ip4_frags_ctl_table[] = {
+static struct ctl_table ip4_frags_ns_ctl_table[] = {
{
.ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH,
.procname = "ipfrag_high_thresh",
@@ -626,6 +627,10 @@ static struct ctl_table ip4_frags_ctl_table[] = {
.proc_handler = &proc_dointvec_jiffies,
.strategy = &sysctl_jiffies
},
+ { }
+};
+
+static struct ctl_table ip4_frags_ctl_table[] = {
{
.ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL,
.procname = "ipfrag_secret_interval",
@@ -646,22 +651,20 @@ static struct ctl_table ip4_frags_ctl_table[] = {
{ }
};
-static int ip4_frags_ctl_register(struct net *net)
+static int ip4_frags_ns_ctl_register(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
- table = ip4_frags_ctl_table;
+ table = ip4_frags_ns_ctl_table;
if (net != &init_net) {
- table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL);
+ table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL);
if (table == NULL)
goto err_alloc;
table[0].data = &net->ipv4.frags.high_thresh;
table[1].data = &net->ipv4.frags.low_thresh;
table[2].data = &net->ipv4.frags.timeout;
- table[3].mode &= ~0222;
- table[4].mode &= ~0222;
}
hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table);
@@ -678,7 +681,7 @@ err_alloc:
return -ENOMEM;
}
-static void ip4_frags_ctl_unregister(struct net *net)
+static void ip4_frags_ns_ctl_unregister(struct net *net)
{
struct ctl_table *table;
@@ -686,13 +689,22 @@ static void ip4_frags_ctl_unregister(struct net *net)
unregister_net_sysctl_table(net->ipv4.frags_hdr);
kfree(table);
}
+
+static void ip4_frags_ctl_register(void)
+{
+ register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table);
+}
#else
-static inline int ip4_frags_ctl_register(struct net *net)
+static inline int ip4_frags_ns_ctl_register(struct net *net)
{
return 0;
}
-static inline void ip4_frags_ctl_unregister(struct net *net)
+static inline void ip4_frags_ns_ctl_unregister(struct net *net)
+{
+}
+
+static inline void ip4_frags_ctl_register(void)
{
}
#endif
@@ -716,12 +728,12 @@ static int ipv4_frags_init_net(struct net *net)
inet_frags_init_net(&net->ipv4.frags);
- return ip4_frags_ctl_register(net);
+ return ip4_frags_ns_ctl_register(net);
}
static void ipv4_frags_exit_net(struct net *net)
{
- ip4_frags_ctl_unregister(net);
+ ip4_frags_ns_ctl_unregister(net);
inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
}
@@ -732,6 +744,7 @@ static struct pernet_operations ip4_frags_ops = {
void __init ipfrag_init(void)
{
+ ip4_frags_ctl_register();
register_pernet_subsys(&ip4_frags_ops);
ip4_frags.hashfn = ip4_hashfn;
ip4_frags.constructor = ip4_frag_init;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 4342cba4ff82..2a61158ea722 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -473,6 +473,8 @@ static int ipgre_rcv(struct sk_buff *skb)
read_lock(&ipgre_lock);
if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
iph->saddr, iph->daddr, key)) != NULL) {
+ struct net_device_stats *stats = &tunnel->dev->stats;
+
secpath_reset(skb);
skb->protocol = *(__be16*)(h + 2);
@@ -497,28 +499,28 @@ static int ipgre_rcv(struct sk_buff *skb)
/* Looped back packet, drop it! */
if (skb->rtable->fl.iif == 0)
goto drop;
- tunnel->stat.multicast++;
+ stats->multicast++;
skb->pkt_type = PACKET_BROADCAST;
}
#endif
if (((flags&GRE_CSUM) && csum) ||
(!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
- tunnel->stat.rx_crc_errors++;
- tunnel->stat.rx_errors++;
+ stats->rx_crc_errors++;
+ stats->rx_errors++;
goto drop;
}
if (tunnel->parms.i_flags&GRE_SEQ) {
if (!(flags&GRE_SEQ) ||
(tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
- tunnel->stat.rx_fifo_errors++;
- tunnel->stat.rx_errors++;
+ stats->rx_fifo_errors++;
+ stats->rx_errors++;
goto drop;
}
tunnel->i_seqno = seqno + 1;
}
- tunnel->stat.rx_packets++;
- tunnel->stat.rx_bytes += skb->len;
+ stats->rx_packets++;
+ stats->rx_bytes += skb->len;
skb->dev = tunnel->dev;
dst_release(skb->dst);
skb->dst = NULL;
@@ -540,7 +542,7 @@ drop_nolock:
static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct net_device_stats *stats = &tunnel->stat;
+ struct net_device_stats *stats = &tunnel->dev->stats;
struct iphdr *old_iph = ip_hdr(skb);
struct iphdr *tiph;
u8 tos;
@@ -554,7 +556,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
int mtu;
if (tunnel->recursion++) {
- tunnel->stat.collisions++;
+ stats->collisions++;
goto tx_error;
}
@@ -570,7 +572,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
/* NBMA tunnel */
if (skb->dst == NULL) {
- tunnel->stat.tx_fifo_errors++;
+ stats->tx_fifo_errors++;
goto tx_error;
}
@@ -621,7 +623,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
.tos = RT_TOS(tos) } },
.proto = IPPROTO_GRE };
if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
- tunnel->stat.tx_carrier_errors++;
+ stats->tx_carrier_errors++;
goto tx_error;
}
}
@@ -629,7 +631,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (tdev == dev) {
ip_rt_put(rt);
- tunnel->stat.collisions++;
+ stats->collisions++;
goto tx_error;
}
@@ -954,11 +956,6 @@ done:
return err;
}
-static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev)
-{
- return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
-}
-
static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
@@ -1084,7 +1081,6 @@ static void ipgre_tunnel_setup(struct net_device *dev)
dev->uninit = ipgre_tunnel_uninit;
dev->destructor = free_netdev;
dev->hard_start_xmit = ipgre_tunnel_xmit;
- dev->get_stats = ipgre_tunnel_get_stats;
dev->do_ioctl = ipgre_tunnel_ioctl;
dev->change_mtu = ipgre_tunnel_change_mtu;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index ff77a4a7f9ec..e0bed56c51f1 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -5,8 +5,6 @@
*
* The Internet Protocol (IP) module.
*
- * Version: $Id: ip_input.c,v 1.55 2002/01/12 07:39:45 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Donald Becker, <becker@super.org>
@@ -147,12 +145,6 @@
#include <linux/netlink.h>
/*
- * SNMP management statistics
- */
-
-DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly;
-
-/*
* Process Router Attention IP option
*/
int ip_call_ra_chain(struct sk_buff *skb)
@@ -232,16 +224,16 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
protocol = -ret;
goto resubmit;
}
- IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
} else {
if (!raw) {
if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS);
icmp_send(skb, ICMP_DEST_UNREACH,
ICMP_PROT_UNREACH, 0);
}
} else
- IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS);
kfree_skb(skb);
}
}
@@ -283,7 +275,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
--ANK (980813)
*/
if (skb_cow(skb, skb_headroom(skb))) {
- IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -292,7 +284,7 @@ static inline int ip_rcv_options(struct sk_buff *skb)
opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
if (ip_options_compile(dev_net(dev), opt, skb)) {
- IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
goto drop;
}
@@ -336,9 +328,11 @@ static int ip_rcv_finish(struct sk_buff *skb)
skb->dev);
if (unlikely(err)) {
if (err == -EHOSTUNREACH)
- IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ IP_INC_STATS_BH(dev_net(skb->dev),
+ IPSTATS_MIB_INADDRERRORS);
else if (err == -ENETUNREACH)
- IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
+ IP_INC_STATS_BH(dev_net(skb->dev),
+ IPSTATS_MIB_INNOROUTES);
goto drop;
}
}
@@ -359,9 +353,9 @@ static int ip_rcv_finish(struct sk_buff *skb)
rt = skb->rtable;
if (rt->rt_type == RTN_MULTICAST)
- IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
+ IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCASTPKTS);
else if (rt->rt_type == RTN_BROADCAST)
- IP_INC_STATS_BH(IPSTATS_MIB_INBCASTPKTS);
+ IP_INC_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCASTPKTS);
return dst_input(skb);
@@ -384,10 +378,10 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
if (skb->pkt_type == PACKET_OTHERHOST)
goto drop;
- IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INRECEIVES);
if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
- IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
goto out;
}
@@ -420,7 +414,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
len = ntohs(iph->tot_len);
if (skb->len < len) {
- IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
goto inhdr_error;
@@ -430,7 +424,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
* Note this now means skb->len holds ntohs(iph->tot_len).
*/
if (pskb_trim_rcsum(skb, len)) {
- IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -441,11 +435,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
ip_rcv_finish);
inhdr_error:
- IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
drop:
kfree_skb(skb);
out:
return NET_RX_DROP;
}
-
-EXPORT_SYMBOL(ip_statistics);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 33126ad2cfdc..be3f18a7a40e 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -5,8 +5,6 @@
*
* The options processing module for ip.c
*
- * Version: $Id: ip_options.c,v 1.21 2001/09/01 00:31:50 davem Exp $
- *
* Authors: A.N.Kuznetsov
*
*/
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e527628f56cf..465544f6281a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -5,8 +5,6 @@
*
* The Internet Protocol (IP) output module.
*
- * Version: $Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Donald Becker, <becker@super.org>
@@ -184,9 +182,9 @@ static inline int ip_finish_output2(struct sk_buff *skb)
unsigned int hh_len = LL_RESERVED_SPACE(dev);
if (rt->rt_type == RTN_MULTICAST)
- IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTMCASTPKTS);
else if (rt->rt_type == RTN_BROADCAST)
- IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTBCASTPKTS);
/* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
@@ -246,7 +244,7 @@ int ip_mc_output(struct sk_buff *skb)
/*
* If the indicated interface is up and running, send the packet.
*/
- IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS);
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -300,7 +298,7 @@ int ip_output(struct sk_buff *skb)
{
struct net_device *dev = skb->dst->dev;
- IP_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_OUTREQUESTS);
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -391,7 +389,7 @@ packet_routed:
return ip_local_out(skb);
no_route:
- IP_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EHOSTUNREACH;
}
@@ -453,7 +451,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
iph = ip_hdr(skb);
if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
- IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(ip_skb_dst_mtu(skb)));
kfree_skb(skb);
@@ -544,7 +542,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
err = output(skb);
if (!err)
- IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
if (err || !frag)
break;
@@ -554,7 +552,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
}
if (err == 0) {
- IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
return 0;
}
@@ -563,7 +561,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
kfree_skb(frag);
frag = skb;
}
- IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
return err;
}
@@ -675,15 +673,15 @@ slow_path:
if (err)
goto fail;
- IP_INC_STATS(IPSTATS_MIB_FRAGCREATES);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
}
kfree_skb(skb);
- IP_INC_STATS(IPSTATS_MIB_FRAGOKS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
return err;
fail:
kfree_skb(skb);
- IP_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
return err;
}
@@ -1049,7 +1047,7 @@ alloc_new_skb:
error:
inet->cork.length -= length;
- IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
return err;
}
@@ -1191,7 +1189,7 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
error:
inet->cork.length -= size;
- IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
return err;
}
@@ -1213,6 +1211,7 @@ int ip_push_pending_frames(struct sock *sk)
struct sk_buff *skb, *tmp_skb;
struct sk_buff **tail_skb;
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
struct ip_options *opt = NULL;
struct rtable *rt = (struct rtable *)inet->cork.dst;
struct iphdr *iph;
@@ -1282,7 +1281,7 @@ int ip_push_pending_frames(struct sock *sk)
skb->dst = dst_clone(&rt->u.dst);
if (iph->protocol == IPPROTO_ICMP)
- icmp_out_count(((struct icmphdr *)
+ icmp_out_count(net, ((struct icmphdr *)
skb_transport_header(skb))->type);
/* Netfilter gets whole the not fragmented skb. */
@@ -1299,7 +1298,7 @@ out:
return err;
error:
- IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
goto out;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index e0514e82308e..105d92a039b9 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -5,8 +5,6 @@
*
* The IP to API glue.
*
- * Version: $Id: ip_sockglue.c,v 1.62 2002/02/01 22:01:04 davem Exp $
- *
* Authors: see ip.c
*
* Fixes:
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index ed45037ce9be..b88aa9afa42e 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1,6 +1,4 @@
/*
- * $Id: ipconfig.c,v 1.46 2002/02/01 22:01:04 davem Exp $
- *
* Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or
* user-supplied information to configure own IP address and routes.
*
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index af5cb53da5cc..4c6d2caf9203 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -1,8 +1,6 @@
/*
* Linux NET3: IP/IP protocol decoder.
*
- * Version: $Id: ipip.c,v 1.50 2001/10/02 02:22:36 davem Exp $
- *
* Authors:
* Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
*
@@ -368,8 +366,8 @@ static int ipip_rcv(struct sk_buff *skb)
skb->protocol = htons(ETH_P_IP);
skb->pkt_type = PACKET_HOST;
- tunnel->stat.rx_packets++;
- tunnel->stat.rx_bytes += skb->len;
+ tunnel->dev->stats.rx_packets++;
+ tunnel->dev->stats.rx_bytes += skb->len;
skb->dev = tunnel->dev;
dst_release(skb->dst);
skb->dst = NULL;
@@ -392,7 +390,7 @@ static int ipip_rcv(struct sk_buff *skb)
static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct net_device_stats *stats = &tunnel->stat;
+ struct net_device_stats *stats = &tunnel->dev->stats;
struct iphdr *tiph = &tunnel->parms.iph;
u8 tos = tunnel->parms.iph.tos;
__be16 df = tiph->frag_off;
@@ -405,7 +403,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
int mtu;
if (tunnel->recursion++) {
- tunnel->stat.collisions++;
+ stats->collisions++;
goto tx_error;
}
@@ -418,7 +416,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (!dst) {
/* NBMA tunnel */
if ((rt = skb->rtable) == NULL) {
- tunnel->stat.tx_fifo_errors++;
+ stats->tx_fifo_errors++;
goto tx_error;
}
if ((dst = rt->rt_gateway) == 0)
@@ -433,7 +431,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
.tos = RT_TOS(tos) } },
.proto = IPPROTO_IPIP };
if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
- tunnel->stat.tx_carrier_errors++;
+ stats->tx_carrier_errors++;
goto tx_error_icmp;
}
}
@@ -441,7 +439,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (tdev == dev) {
ip_rt_put(rt);
- tunnel->stat.collisions++;
+ stats->collisions++;
goto tx_error;
}
@@ -451,7 +449,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
if (mtu < 68) {
- tunnel->stat.collisions++;
+ stats->collisions++;
ip_rt_put(rt);
goto tx_error;
}
@@ -685,11 +683,6 @@ done:
return err;
}
-static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev)
-{
- return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
-}
-
static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
@@ -702,7 +695,6 @@ static void ipip_tunnel_setup(struct net_device *dev)
{
dev->uninit = ipip_tunnel_uninit;
dev->hard_start_xmit = ipip_tunnel_xmit;
- dev->get_stats = ipip_tunnel_get_stats;
dev->do_ioctl = ipip_tunnel_ioctl;
dev->change_mtu = ipip_tunnel_change_mtu;
dev->destructor = free_netdev;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 11700a4dcd95..033c712c3a5d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -9,8 +9,6 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
- *
* Fixes:
* Michael Chastain : Incorrect size of copying.
* Alan Cox : Added the cache manager code
@@ -120,6 +118,31 @@ static struct timer_list ipmr_expire_timer;
/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
+static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
+{
+ dev_close(dev);
+
+ dev = __dev_get_by_name(&init_net, "tunl0");
+ if (dev) {
+ struct ifreq ifr;
+ mm_segment_t oldfs;
+ struct ip_tunnel_parm p;
+
+ memset(&p, 0, sizeof(p));
+ p.iph.daddr = v->vifc_rmt_addr.s_addr;
+ p.iph.saddr = v->vifc_lcl_addr.s_addr;
+ p.iph.version = 4;
+ p.iph.ihl = 5;
+ p.iph.protocol = IPPROTO_IPIP;
+ sprintf(p.name, "dvmrp%d", v->vifc_vifi);
+ ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
+
+ oldfs = get_fs(); set_fs(KERNEL_DS);
+ dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL);
+ set_fs(oldfs);
+ }
+}
+
static
struct net_device *ipmr_new_tunnel(struct vifctl *v)
{
@@ -161,6 +184,7 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
if (dev_open(dev))
goto failure;
+ dev_hold(dev);
}
}
return dev;
@@ -181,26 +205,20 @@ static int reg_vif_num = -1;
static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
{
read_lock(&mrt_lock);
- ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len;
- ((struct net_device_stats*)netdev_priv(dev))->tx_packets++;
+ dev->stats.tx_bytes += skb->len;
+ dev->stats.tx_packets++;
ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
read_unlock(&mrt_lock);
kfree_skb(skb);
return 0;
}
-static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
-{
- return (struct net_device_stats*)netdev_priv(dev);
-}
-
static void reg_vif_setup(struct net_device *dev)
{
dev->type = ARPHRD_PIMREG;
dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
dev->flags = IFF_NOARP;
dev->hard_start_xmit = reg_vif_xmit;
- dev->get_stats = reg_vif_get_stats;
dev->destructor = free_netdev;
}
@@ -209,8 +227,7 @@ static struct net_device *ipmr_reg_vif(void)
struct net_device *dev;
struct in_device *in_dev;
- dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg",
- reg_vif_setup);
+ dev = alloc_netdev(0, "pimreg", reg_vif_setup);
if (dev == NULL)
return NULL;
@@ -234,6 +251,8 @@ static struct net_device *ipmr_reg_vif(void)
if (dev_open(dev))
goto failure;
+ dev_hold(dev);
+
return dev;
failure:
@@ -248,9 +267,10 @@ failure:
/*
* Delete a VIF entry
+ * @notify: Set to 1, if the caller is a notifier_call
*/
-static int vif_delete(int vifi)
+static int vif_delete(int vifi, int notify)
{
struct vif_device *v;
struct net_device *dev;
@@ -293,7 +313,7 @@ static int vif_delete(int vifi)
ip_rt_multicast_event(in_dev);
}
- if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
+ if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
unregister_netdevice(dev);
dev_put(dev);
@@ -398,6 +418,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
struct vif_device *v = &vif_table[vifi];
struct net_device *dev;
struct in_device *in_dev;
+ int err;
/* Is vif busy ? */
if (VIF_EXISTS(vifi))
@@ -415,18 +436,34 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
dev = ipmr_reg_vif();
if (!dev)
return -ENOBUFS;
+ err = dev_set_allmulti(dev, 1);
+ if (err) {
+ unregister_netdevice(dev);
+ dev_put(dev);
+ return err;
+ }
break;
#endif
case VIFF_TUNNEL:
dev = ipmr_new_tunnel(vifc);
if (!dev)
return -ENOBUFS;
+ err = dev_set_allmulti(dev, 1);
+ if (err) {
+ ipmr_del_tunnel(dev, vifc);
+ dev_put(dev);
+ return err;
+ }
break;
case 0:
dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
if (!dev)
return -EADDRNOTAVAIL;
- dev_put(dev);
+ err = dev_set_allmulti(dev, 1);
+ if (err) {
+ dev_put(dev);
+ return err;
+ }
break;
default:
return -EINVAL;
@@ -435,7 +472,6 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
return -EADDRNOTAVAIL;
IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
- dev_set_allmulti(dev, +1);
ip_rt_multicast_event(in_dev);
/*
@@ -458,7 +494,6 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
- dev_hold(dev);
v->dev=dev;
#ifdef CONFIG_IP_PIMSM
if (v->flags&VIFF_REGISTER)
@@ -805,7 +840,7 @@ static void mroute_clean_tables(struct sock *sk)
*/
for (i=0; i<maxvif; i++) {
if (!(vif_table[i].flags&VIFF_STATIC))
- vif_delete(i);
+ vif_delete(i, 0);
}
/*
@@ -918,7 +953,7 @@ int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int opt
if (optname==MRT_ADD_VIF) {
ret = vif_add(&vif, sk==mroute_socket);
} else {
- ret = vif_delete(vif.vifc_vifi);
+ ret = vif_delete(vif.vifc_vifi, 0);
}
rtnl_unlock();
return ret;
@@ -1097,7 +1132,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
v=&vif_table[0];
for (ct=0;ct<maxvif;ct++,v++) {
if (v->dev==dev)
- vif_delete(ct);
+ vif_delete(ct, 1);
}
return NOTIFY_DONE;
}
@@ -1143,7 +1178,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
{
struct ip_options * opt = &(IPCB(skb)->opt);
- IP_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
if (unlikely(opt->optlen))
ip_forward_options(skb);
@@ -1170,8 +1205,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
if (vif->flags & VIFF_REGISTER) {
vif->pkt_out++;
vif->bytes_out+=skb->len;
- ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len;
- ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++;
+ vif->dev->stats.tx_bytes += skb->len;
+ vif->dev->stats.tx_packets++;
ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
kfree_skb(skb);
return;
@@ -1206,7 +1241,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
to blackhole.
*/
- IP_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
ip_rt_put(rt);
goto out_free;
}
@@ -1230,8 +1265,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
if (vif->flags & VIFF_TUNNEL) {
ip_encap(skb, vif->local, vif->remote);
/* FIXME: extra output firewall step used to be here. --RR */
- ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++;
- ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len;
+ vif->dev->stats.tx_packets++;
+ vif->dev->stats.tx_bytes += skb->len;
}
IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1487,8 +1522,8 @@ int pim_rcv_v1(struct sk_buff * skb)
skb->pkt_type = PACKET_HOST;
dst_release(skb->dst);
skb->dst = NULL;
- ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
- ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
+ reg_dev->stats.rx_bytes += skb->len;
+ reg_dev->stats.rx_packets++;
nf_reset(skb);
netif_rx(skb);
dev_put(reg_dev);
@@ -1542,8 +1577,8 @@ static int pim_rcv(struct sk_buff * skb)
skb->ip_summed = 0;
skb->pkt_type = PACKET_HOST;
dst_release(skb->dst);
- ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len;
- ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++;
+ reg_dev->stats.rx_bytes += skb->len;
+ reg_dev->stats.rx_packets++;
skb->dst = NULL;
nf_reset(skb);
netif_rx(skb);
@@ -1887,16 +1922,36 @@ static struct net_protocol pim_protocol = {
* Setup for IP multicast routing
*/
-void __init ip_mr_init(void)
+int __init ip_mr_init(void)
{
+ int err;
+
mrt_cachep = kmem_cache_create("ip_mrt_cache",
sizeof(struct mfc_cache),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
+ if (!mrt_cachep)
+ return -ENOMEM;
+
setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
- register_netdevice_notifier(&ip_mr_notifier);
+ err = register_netdevice_notifier(&ip_mr_notifier);
+ if (err)
+ goto reg_notif_fail;
#ifdef CONFIG_PROC_FS
- proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops);
- proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops);
+ err = -ENOMEM;
+ if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
+ goto proc_vif_fail;
+ if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
+ goto proc_cache_fail;
#endif
+ return 0;
+reg_notif_fail:
+ kmem_cache_destroy(mrt_cachep);
+#ifdef CONFIG_PROC_FS
+proc_vif_fail:
+ unregister_netdevice_notifier(&ip_mr_notifier);
+proc_cache_fail:
+ proc_net_remove(&init_net, "ip_mr_vif");
+#endif
+ return err;
}
diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c
index 535abe0c45e7..1f1897a1a702 100644
--- a/net/ipv4/ipvs/ip_vs_app.c
+++ b/net/ipv4/ipvs/ip_vs_app.c
@@ -1,8 +1,6 @@
/*
* ip_vs_app.c: Application module support for IPVS
*
- * Version: $Id: ip_vs_app.c,v 1.17 2003/03/22 06:31:21 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 65f1ba112752..f8bdae47a77f 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
* Julian Anastasov <ja@ssi.bg>
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 963981a9d501..a7879eafc3b5 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
* Julian Anastasov <ja@ssi.bg>
@@ -993,7 +991,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
== sysctl_ip_vs_sync_threshold[0])) ||
((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
- (cp->state == IP_VS_TCP_S_CLOSE)))))
+ (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
+ (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
ip_vs_sync_conn(cp);
cp->old_state = cp->state;
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 94c5767c8e01..9a5ace0b4dd6 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
* Julian Anastasov <ja@ssi.bg>
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index dcf5d46aaa5e..8afc1503ed20 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -1,8 +1,6 @@
/*
* IPVS: Destination Hashing scheduling module
*
- * Version: $Id: ip_vs_dh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@gnuchina.org>
*
* Inspired by the consistent hashing scheduler patch from
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index dfa0d713c801..bc04eedd6dbb 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -1,8 +1,6 @@
/*
* ip_vs_est.c: simple rate estimator for IPVS
*
- * Version: $Id: ip_vs_est.c,v 1.4 2002/11/30 01:50:35 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c
index 59aa166b7678..c1c758e4f733 100644
--- a/net/ipv4/ipvs/ip_vs_ftp.c
+++ b/net/ipv4/ipvs/ip_vs_ftp.c
@@ -1,8 +1,6 @@
/*
* ip_vs_ftp.c: IPVS ftp application module
*
- * Version: $Id: ip_vs_ftp.c,v 1.13 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* Changes:
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index 3888642706ad..0efa3db4b180 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -1,8 +1,6 @@
/*
* IPVS: Locality-Based Least-Connection scheduling module
*
- * Version: $Id: ip_vs_lblc.c,v 1.10 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@gnuchina.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index daa260eb21cf..8e3bbeb45138 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -1,8 +1,6 @@
/*
* IPVS: Locality-Based Least-Connection with Replication scheduler
*
- * Version: $Id: ip_vs_lblcr.c,v 1.11 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@gnuchina.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c
index d88fef90a641..ac9f08e065d5 100644
--- a/net/ipv4/ipvs/ip_vs_lc.c
+++ b/net/ipv4/ipvs/ip_vs_lc.c
@@ -1,8 +1,6 @@
/*
* IPVS: Least-Connection Scheduling module
*
- * Version: $Id: ip_vs_lc.c,v 1.10 2003/04/18 09:03:16 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c
index bc2a9e5f2a7b..a46bf258d420 100644
--- a/net/ipv4/ipvs/ip_vs_nq.c
+++ b/net/ipv4/ipvs/ip_vs_nq.c
@@ -1,8 +1,6 @@
/*
* IPVS: Never Queue scheduling module
*
- * Version: $Id: ip_vs_nq.c,v 1.2 2003/06/08 09:31:19 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c
index 4b1c16cbb16b..876714f23d65 100644
--- a/net/ipv4/ipvs/ip_vs_proto.c
+++ b/net/ipv4/ipvs/ip_vs_proto.c
@@ -1,8 +1,6 @@
/*
* ip_vs_proto.c: transport protocol load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto.c,v 1.2 2003/04/18 09:03:16 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Julian Anastasov <ja@ssi.bg>
*
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 4bf835e1d86d..73e0ea87c1f5 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -1,8 +1,6 @@
/*
* ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto_ah.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
- *
* Authors: Julian Anastasov <ja@ssi.bg>, February 2002
* Wensong Zhang <wensong@linuxvirtualserver.org>
*
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index db6a6b7b1a0b..21d70c8ffa54 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -1,8 +1,6 @@
/*
* ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto_esp.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
- *
* Authors: Julian Anastasov <ja@ssi.bg>, February 2002
* Wensong Zhang <wensong@linuxvirtualserver.org>
*
diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c
index b83dc14b0a4d..d0ea467986a0 100644
--- a/net/ipv4/ipvs/ip_vs_proto_tcp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c
@@ -1,8 +1,6 @@
/*
* ip_vs_proto_tcp.c: TCP load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Julian Anastasov <ja@ssi.bg>
*
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 75771cb3cd6f..c6be5d56823f 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -1,8 +1,6 @@
/*
* ip_vs_proto_udp.c: UDP load balancing support for IPVS
*
- * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Julian Anastasov <ja@ssi.bg>
*
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c
index 433f8a947924..c8db12d39e61 100644
--- a/net/ipv4/ipvs/ip_vs_rr.c
+++ b/net/ipv4/ipvs/ip_vs_rr.c
@@ -1,8 +1,6 @@
/*
* IPVS: Round-Robin Scheduling module
*
- * Version: $Id: ip_vs_rr.c,v 1.9 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
*
diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/ipv4/ipvs/ip_vs_sched.c
index 121a32b1b756..b64767309855 100644
--- a/net/ipv4/ipvs/ip_vs_sched.c
+++ b/net/ipv4/ipvs/ip_vs_sched.c
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_sched.c,v 1.13 2003/05/10 03:05:23 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
*
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
index dd7c128f9db3..2a7d31358181 100644
--- a/net/ipv4/ipvs/ip_vs_sed.c
+++ b/net/ipv4/ipvs/ip_vs_sed.c
@@ -1,8 +1,6 @@
/*
* IPVS: Shortest Expected Delay scheduling module
*
- * Version: $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 1b25b00ef1e1..b8fdfac65001 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -1,8 +1,6 @@
/*
* IPVS: Source Hashing scheduling module
*
- * Version: $Id: ip_vs_sh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@gnuchina.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index eff54efe0351..45e9bd96c286 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -5,8 +5,6 @@
* high-performance and highly available server based on a
* cluster of servers.
*
- * Version: $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* ip_vs_sync: sync connection info from master load balancer to backups
@@ -29,10 +27,12 @@
#include <linux/in.h>
#include <linux/igmp.h> /* for ip_mc_join_group */
#include <linux/udp.h>
+#include <linux/err.h>
+#include <linux/kthread.h>
+#include <linux/wait.h>
#include <net/ip.h>
#include <net/sock.h>
-#include <asm/uaccess.h> /* for get_fs and set_fs */
#include <net/ip_vs.h>
@@ -68,8 +68,8 @@ struct ip_vs_sync_conn_options {
};
struct ip_vs_sync_thread_data {
- struct completion *startup;
- int state;
+ struct socket *sock;
+ char *buf;
};
#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
@@ -140,18 +140,19 @@ volatile int ip_vs_backup_syncid = 0;
char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+/* sync daemon tasks */
+static struct task_struct *sync_master_thread;
+static struct task_struct *sync_backup_thread;
+
/* multicast addr */
-static struct sockaddr_in mcast_addr;
+static struct sockaddr_in mcast_addr = {
+ .sin_family = AF_INET,
+ .sin_port = __constant_htons(IP_VS_SYNC_PORT),
+ .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP),
+};
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
-{
- spin_lock(&ip_vs_sync_lock);
- list_add_tail(&sb->list, &ip_vs_sync_queue);
- spin_unlock(&ip_vs_sync_lock);
-}
-
-static inline struct ip_vs_sync_buff * sb_dequeue(void)
+static inline struct ip_vs_sync_buff *sb_dequeue(void)
{
struct ip_vs_sync_buff *sb;
@@ -195,6 +196,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
kfree(sb);
}
+static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+{
+ spin_lock(&ip_vs_sync_lock);
+ if (ip_vs_sync_state & IP_VS_STATE_MASTER)
+ list_add_tail(&sb->list, &ip_vs_sync_queue);
+ else
+ ip_vs_sync_buff_release(sb);
+ spin_unlock(&ip_vs_sync_lock);
+}
+
/*
* Get the current sync buffer if it has been created for more
* than the specified time or the specified time is zero.
@@ -574,14 +585,17 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
static struct socket * make_send_sock(void)
{
struct socket *sock;
+ int result;
/* First create a socket */
- if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
+ result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (result < 0) {
IP_VS_ERR("Error during creation of socket; terminating\n");
- return NULL;
+ return ERR_PTR(result);
}
- if (set_mcast_if(sock->sk, ip_vs_master_mcast_ifn) < 0) {
+ result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+ if (result < 0) {
IP_VS_ERR("Error setting outbound mcast interface\n");
goto error;
}
@@ -589,14 +603,15 @@ static struct socket * make_send_sock(void)
set_mcast_loop(sock->sk, 0);
set_mcast_ttl(sock->sk, 1);
- if (bind_mcastif_addr(sock, ip_vs_master_mcast_ifn) < 0) {
+ result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+ if (result < 0) {
IP_VS_ERR("Error binding address of the mcast interface\n");
goto error;
}
- if (sock->ops->connect(sock,
- (struct sockaddr*)&mcast_addr,
- sizeof(struct sockaddr), 0) < 0) {
+ result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
+ sizeof(struct sockaddr), 0);
+ if (result < 0) {
IP_VS_ERR("Error connecting to the multicast addr\n");
goto error;
}
@@ -605,7 +620,7 @@ static struct socket * make_send_sock(void)
error:
sock_release(sock);
- return NULL;
+ return ERR_PTR(result);
}
@@ -615,27 +630,30 @@ static struct socket * make_send_sock(void)
static struct socket * make_receive_sock(void)
{
struct socket *sock;
+ int result;
/* First create a socket */
- if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
+ result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ if (result < 0) {
IP_VS_ERR("Error during creation of socket; terminating\n");
- return NULL;
+ return ERR_PTR(result);
}
/* it is equivalent to the REUSEADDR option in user-space */
sock->sk->sk_reuse = 1;
- if (sock->ops->bind(sock,
- (struct sockaddr*)&mcast_addr,
- sizeof(struct sockaddr)) < 0) {
+ result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
+ sizeof(struct sockaddr));
+ if (result < 0) {
IP_VS_ERR("Error binding to the multicast addr\n");
goto error;
}
/* join the multicast group */
- if (join_mcast_group(sock->sk,
- (struct in_addr*)&mcast_addr.sin_addr,
- ip_vs_backup_mcast_ifn) < 0) {
+ result = join_mcast_group(sock->sk,
+ (struct in_addr *) &mcast_addr.sin_addr,
+ ip_vs_backup_mcast_ifn);
+ if (result < 0) {
IP_VS_ERR("Error joining to the multicast group\n");
goto error;
}
@@ -644,7 +662,7 @@ static struct socket * make_receive_sock(void)
error:
sock_release(sock);
- return NULL;
+ return ERR_PTR(result);
}
@@ -702,44 +720,29 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
}
-static DECLARE_WAIT_QUEUE_HEAD(sync_wait);
-static pid_t sync_master_pid = 0;
-static pid_t sync_backup_pid = 0;
-
-static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait);
-static int stop_master_sync = 0;
-static int stop_backup_sync = 0;
-
-static void sync_master_loop(void)
+static int sync_thread_master(void *data)
{
- struct socket *sock;
+ struct ip_vs_sync_thread_data *tinfo = data;
struct ip_vs_sync_buff *sb;
- /* create the sending multicast socket */
- sock = make_send_sock();
- if (!sock)
- return;
-
IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
"syncid = %d\n",
ip_vs_master_mcast_ifn, ip_vs_master_syncid);
- for (;;) {
- while ((sb=sb_dequeue())) {
- ip_vs_send_sync_msg(sock, sb->mesg);
+ while (!kthread_should_stop()) {
+ while ((sb = sb_dequeue())) {
+ ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
}
/* check if entries stay in curr_sb for 2 seconds */
- if ((sb = get_curr_sync_buff(2*HZ))) {
- ip_vs_send_sync_msg(sock, sb->mesg);
+ sb = get_curr_sync_buff(2 * HZ);
+ if (sb) {
+ ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
}
- if (stop_master_sync)
- break;
-
- msleep_interruptible(1000);
+ schedule_timeout_interruptible(HZ);
}
/* clean up the sync_buff queue */
@@ -753,267 +756,175 @@ static void sync_master_loop(void)
}
/* release the sending multicast socket */
- sock_release(sock);
+ sock_release(tinfo->sock);
+ kfree(tinfo);
+
+ return 0;
}
-static void sync_backup_loop(void)
+static int sync_thread_backup(void *data)
{
- struct socket *sock;
- char *buf;
+ struct ip_vs_sync_thread_data *tinfo = data;
int len;
- if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) {
- IP_VS_ERR("sync_backup_loop: kmalloc error\n");
- return;
- }
-
- /* create the receiving multicast socket */
- sock = make_receive_sock();
- if (!sock)
- goto out;
-
IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
"syncid = %d\n",
ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
- for (;;) {
- /* do you have data now? */
- while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) {
- if ((len =
- ip_vs_receive(sock, buf,
- sync_recv_mesg_maxlen)) <= 0) {
+ while (!kthread_should_stop()) {
+ wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
+ !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
+ || kthread_should_stop());
+
+ /* do we have data now? */
+ while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
+ len = ip_vs_receive(tinfo->sock, tinfo->buf,
+ sync_recv_mesg_maxlen);
+ if (len <= 0) {
IP_VS_ERR("receiving message error\n");
break;
}
- /* disable bottom half, because it accessed the data
+
+ /* disable bottom half, because it accesses the data
shared by softirq while getting/creating conns */
local_bh_disable();
- ip_vs_process_message(buf, len);
+ ip_vs_process_message(tinfo->buf, len);
local_bh_enable();
}
-
- if (stop_backup_sync)
- break;
-
- msleep_interruptible(1000);
}
/* release the sending multicast socket */
- sock_release(sock);
+ sock_release(tinfo->sock);
+ kfree(tinfo->buf);
+ kfree(tinfo);
- out:
- kfree(buf);
+ return 0;
}
-static void set_sync_pid(int sync_state, pid_t sync_pid)
-{
- if (sync_state == IP_VS_STATE_MASTER)
- sync_master_pid = sync_pid;
- else if (sync_state == IP_VS_STATE_BACKUP)
- sync_backup_pid = sync_pid;
-}
-
-static void set_stop_sync(int sync_state, int set)
+int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
{
- if (sync_state == IP_VS_STATE_MASTER)
- stop_master_sync = set;
- else if (sync_state == IP_VS_STATE_BACKUP)
- stop_backup_sync = set;
- else {
- stop_master_sync = set;
- stop_backup_sync = set;
- }
-}
+ struct ip_vs_sync_thread_data *tinfo;
+ struct task_struct **realtask, *task;
+ struct socket *sock;
+ char *name, *buf = NULL;
+ int (*threadfn)(void *data);
+ int result = -ENOMEM;
-static int sync_thread(void *startup)
-{
- DECLARE_WAITQUEUE(wait, current);
- mm_segment_t oldmm;
- int state;
- const char *name;
- struct ip_vs_sync_thread_data *tinfo = startup;
+ IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
+ IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
+ sizeof(struct ip_vs_sync_conn));
- /* increase the module use count */
- ip_vs_use_count_inc();
+ if (state == IP_VS_STATE_MASTER) {
+ if (sync_master_thread)
+ return -EEXIST;
- if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) {
- state = IP_VS_STATE_MASTER;
+ strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
+ sizeof(ip_vs_master_mcast_ifn));
+ ip_vs_master_syncid = syncid;
+ realtask = &sync_master_thread;
name = "ipvs_syncmaster";
- } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) {
- state = IP_VS_STATE_BACKUP;
+ threadfn = sync_thread_master;
+ sock = make_send_sock();
+ } else if (state == IP_VS_STATE_BACKUP) {
+ if (sync_backup_thread)
+ return -EEXIST;
+
+ strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
+ sizeof(ip_vs_backup_mcast_ifn));
+ ip_vs_backup_syncid = syncid;
+ realtask = &sync_backup_thread;
name = "ipvs_syncbackup";
+ threadfn = sync_thread_backup;
+ sock = make_receive_sock();
} else {
- IP_VS_BUG();
- ip_vs_use_count_dec();
return -EINVAL;
}
- daemonize(name);
-
- oldmm = get_fs();
- set_fs(KERNEL_DS);
-
- /* Block all signals */
- spin_lock_irq(&current->sighand->siglock);
- siginitsetinv(&current->blocked, 0);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
+ if (IS_ERR(sock)) {
+ result = PTR_ERR(sock);
+ goto out;
+ }
- /* set the maximum length of sync message */
set_sync_mesg_maxlen(state);
+ if (state == IP_VS_STATE_BACKUP) {
+ buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+ if (!buf)
+ goto outsocket;
+ }
- /* set up multicast address */
- mcast_addr.sin_family = AF_INET;
- mcast_addr.sin_port = htons(IP_VS_SYNC_PORT);
- mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP);
-
- add_wait_queue(&sync_wait, &wait);
-
- set_sync_pid(state, task_pid_nr(current));
- complete(tinfo->startup);
-
- /*
- * once we call the completion queue above, we should
- * null out that reference, since its allocated on the
- * stack of the creating kernel thread
- */
- tinfo->startup = NULL;
-
- /* processing master/backup loop here */
- if (state == IP_VS_STATE_MASTER)
- sync_master_loop();
- else if (state == IP_VS_STATE_BACKUP)
- sync_backup_loop();
- else IP_VS_BUG();
-
- remove_wait_queue(&sync_wait, &wait);
-
- /* thread exits */
-
- /*
- * If we weren't explicitly stopped, then we
- * exited in error, and should undo our state
- */
- if ((!stop_master_sync) && (!stop_backup_sync))
- ip_vs_sync_state -= tinfo->state;
+ tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
+ if (!tinfo)
+ goto outbuf;
- set_sync_pid(state, 0);
- IP_VS_INFO("sync thread stopped!\n");
+ tinfo->sock = sock;
+ tinfo->buf = buf;
- set_fs(oldmm);
+ task = kthread_run(threadfn, tinfo, name);
+ if (IS_ERR(task)) {
+ result = PTR_ERR(task);
+ goto outtinfo;
+ }
- /* decrease the module use count */
- ip_vs_use_count_dec();
+ /* mark as active */
+ *realtask = task;
+ ip_vs_sync_state |= state;
- set_stop_sync(state, 0);
- wake_up(&stop_sync_wait);
+ /* increase the module use count */
+ ip_vs_use_count_inc();
- /*
- * we need to free the structure that was allocated
- * for us in start_sync_thread
- */
- kfree(tinfo);
return 0;
-}
-
-
-static int fork_sync_thread(void *startup)
-{
- pid_t pid;
-
- /* fork the sync thread here, then the parent process of the
- sync thread is the init process after this thread exits. */
- repeat:
- if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) {
- IP_VS_ERR("could not create sync_thread due to %d... "
- "retrying.\n", pid);
- msleep_interruptible(1000);
- goto repeat;
- }
- return 0;
+outtinfo:
+ kfree(tinfo);
+outbuf:
+ kfree(buf);
+outsocket:
+ sock_release(sock);
+out:
+ return result;
}
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int stop_sync_thread(int state)
{
- DECLARE_COMPLETION_ONSTACK(startup);
- pid_t pid;
- struct ip_vs_sync_thread_data *tinfo;
-
- if ((state == IP_VS_STATE_MASTER && sync_master_pid) ||
- (state == IP_VS_STATE_BACKUP && sync_backup_pid))
- return -EEXIST;
-
- /*
- * Note that tinfo will be freed in sync_thread on exit
- */
- tinfo = kmalloc(sizeof(struct ip_vs_sync_thread_data), GFP_KERNEL);
- if (!tinfo)
- return -ENOMEM;
-
IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
- IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
- sizeof(struct ip_vs_sync_conn));
- ip_vs_sync_state |= state;
if (state == IP_VS_STATE_MASTER) {
- strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_master_mcast_ifn));
- ip_vs_master_syncid = syncid;
- } else {
- strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_backup_mcast_ifn));
- ip_vs_backup_syncid = syncid;
- }
-
- tinfo->state = state;
- tinfo->startup = &startup;
-
- repeat:
- if ((pid = kernel_thread(fork_sync_thread, tinfo, 0)) < 0) {
- IP_VS_ERR("could not create fork_sync_thread due to %d... "
- "retrying.\n", pid);
- msleep_interruptible(1000);
- goto repeat;
- }
-
- wait_for_completion(&startup);
-
- return 0;
-}
+ if (!sync_master_thread)
+ return -ESRCH;
+ IP_VS_INFO("stopping master sync thread %d ...\n",
+ task_pid_nr(sync_master_thread));
-int stop_sync_thread(int state)
-{
- DECLARE_WAITQUEUE(wait, current);
+ /*
+ * The lock synchronizes with sb_queue_tail(), so that we don't
+ * add sync buffers to the queue, when we are already in
+ * progress of stopping the master sync daemon.
+ */
- if ((state == IP_VS_STATE_MASTER && !sync_master_pid) ||
- (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
- return -ESRCH;
+ spin_lock(&ip_vs_sync_lock);
+ ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
+ spin_unlock(&ip_vs_sync_lock);
+ kthread_stop(sync_master_thread);
+ sync_master_thread = NULL;
+ } else if (state == IP_VS_STATE_BACKUP) {
+ if (!sync_backup_thread)
+ return -ESRCH;
+
+ IP_VS_INFO("stopping backup sync thread %d ...\n",
+ task_pid_nr(sync_backup_thread));
+
+ ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
+ kthread_stop(sync_backup_thread);
+ sync_backup_thread = NULL;
+ } else {
+ return -EINVAL;
+ }
- IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
- IP_VS_INFO("stopping sync thread %d ...\n",
- (state == IP_VS_STATE_MASTER) ?
- sync_master_pid : sync_backup_pid);
-
- __set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&stop_sync_wait, &wait);
- set_stop_sync(state, 1);
- ip_vs_sync_state -= state;
- wake_up(&sync_wait);
- schedule();
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&stop_sync_wait, &wait);
-
- /* Note: no need to reap the sync thread, because its parent
- process is the init process */
-
- if ((state == IP_VS_STATE_MASTER && stop_master_sync) ||
- (state == IP_VS_STATE_BACKUP && stop_backup_sync))
- IP_VS_BUG();
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
return 0;
}
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c
index 8a9d913261d8..772c3cb4eca1 100644
--- a/net/ipv4/ipvs/ip_vs_wlc.c
+++ b/net/ipv4/ipvs/ip_vs_wlc.c
@@ -1,8 +1,6 @@
/*
* IPVS: Weighted Least-Connection Scheduling module
*
- * Version: $Id: ip_vs_wlc.c,v 1.13 2003/04/18 09:03:16 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Peter Kese <peter.kese@ijs.si>
*
diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c
index 85c680add6df..1d6932d7dc97 100644
--- a/net/ipv4/ipvs/ip_vs_wrr.c
+++ b/net/ipv4/ipvs/ip_vs_wrr.c
@@ -1,8 +1,6 @@
/*
* IPVS: Weighted Round-Robin Scheduling module
*
- * Version: $Id: ip_vs_wrr.c,v 1.12 2002/09/15 08:14:08 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index f63006caea03..9892d4aca42e 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -1,8 +1,6 @@
/*
* ip_vs_xmit.c: various packet transmitters for IPVS
*
- * Version: $Id: ip_vs_xmit.c,v 1.2 2002/11/30 01:50:35 wensong Exp $
- *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
* Julian Anastasov <ja@ssi.bg>
*
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 2767841a8cef..f23e60c93ef9 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -213,8 +213,7 @@ config IP_NF_TARGET_NETMAP
help
NETMAP is an implementation of static 1:1 NAT mapping of network
addresses. It maps the network address part, while keeping the host
- address part intact. It is similar to Fast NAT, except that
- Netfilter's connection tracking doesn't work well with Fast NAT.
+ address part intact.
To compile it as a module, choose M here. If unsure, say N.
@@ -365,6 +364,18 @@ config IP_NF_RAW
If you want to compile it as a module, say M here and read
<file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+# security table for MAC policy
+config IP_NF_SECURITY
+ tristate "Security table"
+ depends on IP_NF_IPTABLES
+ depends on SECURITY
+ default m if NETFILTER_ADVANCED=n
+ help
+ This option adds a `security' table to iptables, for use
+ with Mandatory Access Control (MAC) policy.
+
+ If unsure, say N.
+
# ARP tables
config IP_NF_ARPTABLES
tristate "ARP tables support"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index d9b92fbf5579..3f31291f37ce 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -42,6 +42,7 @@ obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
obj-$(CONFIG_NF_NAT) += iptable_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
+obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
# matches
obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 26a37cedcf2e..aa33a4a7a715 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -156,7 +156,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
case IPQ_COPY_META:
case IPQ_COPY_NONE:
size = NLMSG_SPACE(sizeof(*pmsg));
- data_len = 0;
break;
case IPQ_COPY_PACKET:
@@ -224,8 +223,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
return skb;
nlmsg_failure:
- if (skb)
- kfree_skb(skb);
*errp = -EINVAL;
printk(KERN_ERR "ip_queue: error creating packet message\n");
return NULL;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
new file mode 100644
index 000000000000..2b472ac2263a
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -0,0 +1,180 @@
+/*
+ * "security" table
+ *
+ * This is for use by Mandatory Access Control (MAC) security models,
+ * which need to be able to manage security policy in separate context
+ * to DAC.
+ *
+ * Based on iptable_mangle.c
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org>
+ * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>");
+MODULE_DESCRIPTION("iptables security table, for MAC rules");
+
+#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT)
+
+static struct
+{
+ struct ipt_replace repl;
+ struct ipt_standard entries[3];
+ struct ipt_error term;
+} initial_table __initdata = {
+ .repl = {
+ .name = "security",
+ .valid_hooks = SECURITY_VALID_HOOKS,
+ .num_entries = 4,
+ .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
+ .hook_entry = {
+ [NF_INET_LOCAL_IN] = 0,
+ [NF_INET_FORWARD] = sizeof(struct ipt_standard),
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
+ },
+ .underflow = {
+ [NF_INET_LOCAL_IN] = 0,
+ [NF_INET_FORWARD] = sizeof(struct ipt_standard),
+ [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
+ },
+ },
+ .entries = {
+ IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */
+ IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
+ },
+ .term = IPT_ERROR_INIT, /* ERROR */
+};
+
+static struct xt_table security_table = {
+ .name = "security",
+ .valid_hooks = SECURITY_VALID_HOOKS,
+ .lock = __RW_LOCK_UNLOCKED(security_table.lock),
+ .me = THIS_MODULE,
+ .af = AF_INET,
+};
+
+static unsigned int
+ipt_local_in_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ nf_local_in_net(in, out)->ipv4.iptable_security);
+}
+
+static unsigned int
+ipt_forward_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ipt_do_table(skb, hook, in, out,
+ nf_forward_net(in, out)->ipv4.iptable_security);
+}
+
+static unsigned int
+ipt_local_out_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* Somebody is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr)
+ || ip_hdrlen(skb) < sizeof(struct iphdr)) {
+ if (net_ratelimit())
+ printk(KERN_INFO "iptable_security: ignoring short "
+ "SOCK_RAW packet.\n");
+ return NF_ACCEPT;
+ }
+ return ipt_do_table(skb, hook, in, out,
+ nf_local_out_net(in, out)->ipv4.iptable_security);
+}
+
+static struct nf_hook_ops ipt_ops[] __read_mostly = {
+ {
+ .hook = ipt_local_in_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_SECURITY,
+ },
+ {
+ .hook = ipt_forward_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_FORWARD,
+ .priority = NF_IP_PRI_SECURITY,
+ },
+ {
+ .hook = ipt_local_out_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_SECURITY,
+ },
+};
+
+static int __net_init iptable_security_net_init(struct net *net)
+{
+ net->ipv4.iptable_security =
+ ipt_register_table(net, &security_table, &initial_table.repl);
+
+ if (IS_ERR(net->ipv4.iptable_security))
+ return PTR_ERR(net->ipv4.iptable_security);
+
+ return 0;
+}
+
+static void __net_exit iptable_security_net_exit(struct net *net)
+{
+ ipt_unregister_table(net->ipv4.iptable_security);
+}
+
+static struct pernet_operations iptable_security_net_ops = {
+ .init = iptable_security_net_init,
+ .exit = iptable_security_net_exit,
+};
+
+static int __init iptable_security_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&iptable_security_net_ops);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ if (ret < 0)
+ goto cleanup_table;
+
+ return ret;
+
+cleanup_table:
+ unregister_pernet_subsys(&iptable_security_net_ops);
+ return ret;
+}
+
+static void __exit iptable_security_fini(void)
+{
+ nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ unregister_pernet_subsys(&iptable_security_net_ops);
+}
+
+module_init(iptable_security_init);
+module_exit(iptable_security_fini);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 78ab19accace..97791048fa9b 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -87,9 +87,8 @@ static int icmp_packet(struct nf_conn *ct,
means this will only run once even if count hits zero twice
(theoretically possible with SMP) */
if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
- if (atomic_dec_and_test(&ct->proto.icmp.count)
- && del_timer(&ct->timeout))
- ct->timeout.function((unsigned long)ct);
+ if (atomic_dec_and_test(&ct->proto.icmp.count))
+ nf_ct_kill_acct(ct, ctinfo, skb);
} else {
atomic_inc(&ct->proto.icmp.count);
nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index 82e4c0e286b8..65e470bc6123 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -36,7 +36,7 @@ sctp_manip_pkt(struct sk_buff *skb,
sctp_sctphdr_t *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
__be32 oldip, newip;
- u32 crc32;
+ __be32 crc32;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
@@ -61,7 +61,7 @@ sctp_manip_pkt(struct sk_buff *skb,
crc32 = sctp_update_cksum((u8 *)skb->data, skb_headlen(skb),
crc32);
crc32 = sctp_end_cksum(crc32);
- hdr->checksum = htonl(crc32);
+ hdr->checksum = crc32;
return true;
}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 7750c97fde7b..ffeaffc3fffe 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -439,8 +439,8 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
unsigned int *len)
{
unsigned long subid;
- unsigned int size;
unsigned long *optr;
+ size_t size;
size = eoc - ctx->pointer + 1;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 552169b41b16..834356ea99df 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -7,8 +7,6 @@
* PROC file system. It is mainly used for debugging and
* statistics.
*
- * Version: $Id: proc.c,v 1.45 2001/05/16 16:45:35 davem Exp $
- *
* Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de>
* Fred Baumgarten, <dc6iq@insu1.etec.uni-karlsruhe.de>
@@ -73,32 +71,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
static int sockstat_seq_open(struct inode *inode, struct file *file)
{
- int err;
- struct net *net;
-
- err = -ENXIO;
- net = get_proc_net(inode);
- if (net == NULL)
- goto err_net;
-
- err = single_open(file, sockstat_seq_show, net);
- if (err < 0)
- goto err_open;
-
- return 0;
-
-err_open:
- put_net(net);
-err_net:
- return err;
-}
-
-static int sockstat_seq_release(struct inode *inode, struct file *file)
-{
- struct net *net = ((struct seq_file *)file->private_data)->private;
-
- put_net(net);
- return single_release(inode, file);
+ return single_open_net(inode, file, sockstat_seq_show);
}
static const struct file_operations sockstat_seq_fops = {
@@ -106,7 +79,7 @@ static const struct file_operations sockstat_seq_fops = {
.open = sockstat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = sockstat_seq_release,
+ .release = single_release_net,
};
/* snmp items */
@@ -268,11 +241,12 @@ static void icmpmsg_put(struct seq_file *seq)
int j, i, count;
static int out[PERLINE];
+ struct net *net = seq->private;
count = 0;
for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
- if (snmp_fold_field((void **) icmpmsg_statistics, i))
+ if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i))
out[count++] = i;
if (count < PERLINE)
continue;
@@ -284,7 +258,7 @@ static void icmpmsg_put(struct seq_file *seq)
seq_printf(seq, "\nIcmpMsg: ");
for (j = 0; j < PERLINE; ++j)
seq_printf(seq, " %lu",
- snmp_fold_field((void **) icmpmsg_statistics,
+ snmp_fold_field((void **) net->mib.icmpmsg_statistics,
out[j]));
seq_putc(seq, '\n');
}
@@ -296,7 +270,7 @@ static void icmpmsg_put(struct seq_file *seq)
seq_printf(seq, "\nIcmpMsg:");
for (j = 0; j < count; ++j)
seq_printf(seq, " %lu", snmp_fold_field((void **)
- icmpmsg_statistics, out[j]));
+ net->mib.icmpmsg_statistics, out[j]));
}
#undef PERLINE
@@ -305,6 +279,7 @@ static void icmpmsg_put(struct seq_file *seq)
static void icmp_put(struct seq_file *seq)
{
int i;
+ struct net *net = seq->private;
seq_puts(seq, "\nIcmp: InMsgs InErrors");
for (i=0; icmpmibmap[i].name != NULL; i++)
@@ -313,18 +288,18 @@ static void icmp_put(struct seq_file *seq)
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " Out%s", icmpmibmap[i].name);
seq_printf(seq, "\nIcmp: %lu %lu",
- snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INMSGS),
- snmp_fold_field((void **) icmp_statistics, ICMP_MIB_INERRORS));
+ snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
+ snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **) icmpmsg_statistics,
+ snmp_fold_field((void **) net->mib.icmpmsg_statistics,
icmpmibmap[i].index));
seq_printf(seq, " %lu %lu",
- snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTMSGS),
- snmp_fold_field((void **) icmp_statistics, ICMP_MIB_OUTERRORS));
+ snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
+ snmp_fold_field((void **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **) icmpmsg_statistics,
+ snmp_fold_field((void **) net->mib.icmpmsg_statistics,
icmpmibmap[i].index | 0x100));
}
@@ -334,6 +309,7 @@ static void icmp_put(struct seq_file *seq)
static int snmp_seq_show(struct seq_file *seq, void *v)
{
int i;
+ struct net *net = seq->private;
seq_puts(seq, "Ip: Forwarding DefaultTTL");
@@ -341,12 +317,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
seq_printf(seq, "\nIp: %d %d",
- IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2,
+ IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
sysctl_ip_default_ttl);
for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **)ip_statistics,
+ snmp_fold_field((void **)net->mib.ip_statistics,
snmp4_ipstats_list[i].entry));
icmp_put(seq); /* RFC 2011 compatibility */
@@ -361,11 +337,11 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
/* MaxConn field is signed, RFC 2012 */
if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
seq_printf(seq, " %ld",
- snmp_fold_field((void **)tcp_statistics,
+ snmp_fold_field((void **)net->mib.tcp_statistics,
snmp4_tcp_list[i].entry));
else
seq_printf(seq, " %lu",
- snmp_fold_field((void **)tcp_statistics,
+ snmp_fold_field((void **)net->mib.tcp_statistics,
snmp4_tcp_list[i].entry));
}
@@ -376,7 +352,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nUdp:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **)udp_statistics,
+ snmp_fold_field((void **)net->mib.udp_statistics,
snmp4_udp_list[i].entry));
/* the UDP and UDP-Lite MIBs are the same */
@@ -387,7 +363,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nUdpLite:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **)udplite_statistics,
+ snmp_fold_field((void **)net->mib.udplite_statistics,
snmp4_udp_list[i].entry));
seq_putc(seq, '\n');
@@ -396,7 +372,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
static int snmp_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, snmp_seq_show, NULL);
+ return single_open_net(inode, file, snmp_seq_show);
}
static const struct file_operations snmp_seq_fops = {
@@ -404,7 +380,7 @@ static const struct file_operations snmp_seq_fops = {
.open = snmp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = single_release_net,
};
@@ -415,6 +391,7 @@ static const struct file_operations snmp_seq_fops = {
static int netstat_seq_show(struct seq_file *seq, void *v)
{
int i;
+ struct net *net = seq->private;
seq_puts(seq, "TcpExt:");
for (i = 0; snmp4_net_list[i].name != NULL; i++)
@@ -423,7 +400,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nTcpExt:");
for (i = 0; snmp4_net_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **)net_statistics,
+ snmp_fold_field((void **)net->mib.net_statistics,
snmp4_net_list[i].entry));
seq_puts(seq, "\nIpExt:");
@@ -433,7 +410,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nIpExt:");
for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void **)ip_statistics,
+ snmp_fold_field((void **)net->mib.ip_statistics,
snmp4_ipextstats_list[i].entry));
seq_putc(seq, '\n');
@@ -442,7 +419,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
static int netstat_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, netstat_seq_show, NULL);
+ return single_open_net(inode, file, netstat_seq_show);
}
static const struct file_operations netstat_seq_fops = {
@@ -450,18 +427,32 @@ static const struct file_operations netstat_seq_fops = {
.open = netstat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = single_release_net,
};
static __net_init int ip_proc_init_net(struct net *net)
{
if (!proc_net_fops_create(net, "sockstat", S_IRUGO, &sockstat_seq_fops))
- return -ENOMEM;
+ goto out_sockstat;
+ if (!proc_net_fops_create(net, "netstat", S_IRUGO, &netstat_seq_fops))
+ goto out_netstat;
+ if (!proc_net_fops_create(net, "snmp", S_IRUGO, &snmp_seq_fops))
+ goto out_snmp;
+
return 0;
+
+out_snmp:
+ proc_net_remove(net, "netstat");
+out_netstat:
+ proc_net_remove(net, "sockstat");
+out_sockstat:
+ return -ENOMEM;
}
static __net_exit void ip_proc_exit_net(struct net *net)
{
+ proc_net_remove(net, "snmp");
+ proc_net_remove(net, "netstat");
proc_net_remove(net, "sockstat");
}
@@ -472,24 +463,6 @@ static __net_initdata struct pernet_operations ip_proc_ops = {
int __init ip_misc_proc_init(void)
{
- int rc = 0;
-
- if (register_pernet_subsys(&ip_proc_ops))
- goto out_pernet;
-
- if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops))
- goto out_netstat;
-
- if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops))
- goto out_snmp;
-out:
- return rc;
-out_snmp:
- proc_net_remove(&init_net, "netstat");
-out_netstat:
- unregister_pernet_subsys(&ip_proc_ops);
-out_pernet:
- rc = -ENOMEM;
- goto out;
+ return register_pernet_subsys(&ip_proc_ops);
}
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 971ab9356e51..ea50da0649fd 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -5,8 +5,6 @@
*
* INET protocol dispatch tables.
*
- * Version: $Id: protocol.c,v 1.14 2001/05/18 02:25:49 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
*
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 37a1ecd9d600..cd975743bcd2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -5,8 +5,6 @@
*
* RAW - implementation of IP "raw" sockets.
*
- * Version: $Id: raw.c,v 1.64 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
*
@@ -322,6 +320,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
struct iphdr *iph;
struct sk_buff *skb;
unsigned int iphlen;
@@ -370,7 +369,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
}
if (iph->protocol == IPPROTO_ICMP)
- icmp_out_count(((struct icmphdr *)
+ icmp_out_count(net, ((struct icmphdr *)
skb_transport_header(skb))->type);
err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
@@ -386,7 +385,7 @@ error_fault:
err = -EFAULT;
kfree_skb(skb);
error:
- IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
return err;
}
@@ -608,12 +607,11 @@ static void raw_close(struct sock *sk, long timeout)
sk_common_release(sk);
}
-static int raw_destroy(struct sock *sk)
+static void raw_destroy(struct sock *sk)
{
lock_sock(sk);
ip_flush_pending_frames(sk);
release_sock(sk);
- return 0;
}
/* This gets rid of all the nasties in af_inet. -DaveM */
@@ -947,7 +945,7 @@ static int raw_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN)
seq_printf(seq, " sl local_address rem_address st tx_queue "
"rx_queue tr tm->when retrnsmt uid timeout "
- "inode drops\n");
+ "inode ref pointer drops\n");
else
raw_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
return 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 96be336064fb..e4ab0ac94f92 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -5,8 +5,6 @@
*
* ROUTE - implementation of the IP router.
*
- * Version: $Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Alan Cox, <gw4pts@gw4pts.ampr.org>
@@ -134,7 +132,6 @@ static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
static void rt_worker_func(struct work_struct *work);
static DECLARE_DELAYED_WORK(expires_work, rt_worker_func);
-static struct timer_list rt_secret_timer;
/*
* Interface to generic destination cache.
@@ -253,20 +250,25 @@ static inline void rt_hash_lock_init(void)
static struct rt_hash_bucket *rt_hash_table __read_mostly;
static unsigned rt_hash_mask __read_mostly;
static unsigned int rt_hash_log __read_mostly;
-static atomic_t rt_genid __read_mostly;
static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
#define RT_CACHE_STAT_INC(field) \
(__raw_get_cpu_var(rt_cache_stat).field++)
-static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx)
+static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
+ int genid)
{
return jhash_3words((__force u32)(__be32)(daddr),
(__force u32)(__be32)(saddr),
- idx, atomic_read(&rt_genid))
+ idx, genid)
& rt_hash_mask;
}
+static inline int rt_genid(struct net *net)
+{
+ return atomic_read(&net->ipv4.rt_genid);
+}
+
#ifdef CONFIG_PROC_FS
struct rt_cache_iter_state {
struct seq_net_private p;
@@ -336,7 +338,7 @@ static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
struct rt_cache_iter_state *st = seq->private;
if (*pos)
return rt_cache_get_idx(seq, *pos - 1);
- st->genid = atomic_read(&rt_genid);
+ st->genid = rt_genid(seq_file_net(seq));
return SEQ_START_TOKEN;
}
@@ -683,6 +685,11 @@ static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
return dev_net(rt1->u.dst.dev) == dev_net(rt2->u.dst.dev);
}
+static inline int rt_is_expired(struct rtable *rth)
+{
+ return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev));
+}
+
/*
* Perform a full scan of hash table and free all entries.
* Can be called by a softirq or a process.
@@ -692,6 +699,7 @@ static void rt_do_flush(int process_context)
{
unsigned int i;
struct rtable *rth, *next;
+ struct rtable * tail;
for (i = 0; i <= rt_hash_mask; i++) {
if (process_context && need_resched())
@@ -701,11 +709,39 @@ static void rt_do_flush(int process_context)
continue;
spin_lock_bh(rt_hash_lock_addr(i));
+#ifdef CONFIG_NET_NS
+ {
+ struct rtable ** prev, * p;
+
+ rth = rt_hash_table[i].chain;
+
+ /* defer releasing the head of the list after spin_unlock */
+ for (tail = rth; tail; tail = tail->u.dst.rt_next)
+ if (!rt_is_expired(tail))
+ break;
+ if (rth != tail)
+ rt_hash_table[i].chain = tail;
+
+ /* call rt_free on entries after the tail requiring flush */
+ prev = &rt_hash_table[i].chain;
+ for (p = *prev; p; p = next) {
+ next = p->u.dst.rt_next;
+ if (!rt_is_expired(p)) {
+ prev = &p->u.dst.rt_next;
+ } else {
+ *prev = next;
+ rt_free(p);
+ }
+ }
+ }
+#else
rth = rt_hash_table[i].chain;
rt_hash_table[i].chain = NULL;
+ tail = NULL;
+#endif
spin_unlock_bh(rt_hash_lock_addr(i));
- for (; rth; rth = next) {
+ for (; rth != tail; rth = next) {
next = rth->u.dst.rt_next;
rt_free(rth);
}
@@ -738,7 +774,7 @@ static void rt_check_expire(void)
continue;
spin_lock_bh(rt_hash_lock_addr(i));
while ((rth = *rthp) != NULL) {
- if (rth->rt_genid != atomic_read(&rt_genid)) {
+ if (rt_is_expired(rth)) {
*rthp = rth->u.dst.rt_next;
rt_free(rth);
continue;
@@ -781,21 +817,21 @@ static void rt_worker_func(struct work_struct *work)
* many times (2^24) without giving recent rt_genid.
* Jenkins hash is strong enough that litle changes of rt_genid are OK.
*/
-static void rt_cache_invalidate(void)
+static void rt_cache_invalidate(struct net *net)
{
unsigned char shuffle;
get_random_bytes(&shuffle, sizeof(shuffle));
- atomic_add(shuffle + 1U, &rt_genid);
+ atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
}
/*
* delay < 0 : invalidate cache (fast : entries will be deleted later)
* delay >= 0 : invalidate & flush cache (can be long)
*/
-void rt_cache_flush(int delay)
+void rt_cache_flush(struct net *net, int delay)
{
- rt_cache_invalidate();
+ rt_cache_invalidate(net);
if (delay >= 0)
rt_do_flush(!in_softirq());
}
@@ -803,10 +839,11 @@ void rt_cache_flush(int delay)
/*
* We change rt_genid and let gc do the cleanup
*/
-static void rt_secret_rebuild(unsigned long dummy)
+static void rt_secret_rebuild(unsigned long __net)
{
- rt_cache_invalidate();
- mod_timer(&rt_secret_timer, jiffies + ip_rt_secret_interval);
+ struct net *net = (struct net *)__net;
+ rt_cache_invalidate(net);
+ mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
}
/*
@@ -882,7 +919,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
rthp = &rt_hash_table[k].chain;
spin_lock_bh(rt_hash_lock_addr(k));
while ((rth = *rthp) != NULL) {
- if (rth->rt_genid == atomic_read(&rt_genid) &&
+ if (!rt_is_expired(rth) &&
!rt_may_expire(rth, tmo, expire)) {
tmo >>= 1;
rthp = &rth->u.dst.rt_next;
@@ -964,7 +1001,7 @@ restart:
spin_lock_bh(rt_hash_lock_addr(hash));
while ((rth = *rthp) != NULL) {
- if (rth->rt_genid != atomic_read(&rt_genid)) {
+ if (rt_is_expired(rth)) {
*rthp = rth->u.dst.rt_next;
rt_free(rth);
continue;
@@ -1140,7 +1177,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
spin_lock_bh(rt_hash_lock_addr(hash));
ip_rt_put(rt);
while ((aux = *rthp) != NULL) {
- if (aux == rt || (aux->rt_genid != atomic_read(&rt_genid))) {
+ if (aux == rt || rt_is_expired(aux)) {
*rthp = aux->u.dst.rt_next;
rt_free(aux);
continue;
@@ -1182,7 +1219,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
for (i = 0; i < 2; i++) {
for (k = 0; k < 2; k++) {
- unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
+ unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
+ rt_genid(net));
rthp=&rt_hash_table[hash].chain;
@@ -1194,7 +1232,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
rth->fl.fl4_src != skeys[i] ||
rth->fl.oif != ikeys[k] ||
rth->fl.iif != 0 ||
- rth->rt_genid != atomic_read(&rt_genid) ||
+ rt_is_expired(rth) ||
!net_eq(dev_net(rth->u.dst.dev), net)) {
rthp = &rth->u.dst.rt_next;
continue;
@@ -1233,7 +1271,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
rt->u.dst.neighbour = NULL;
rt->u.dst.hh = NULL;
rt->u.dst.xfrm = NULL;
- rt->rt_genid = atomic_read(&rt_genid);
+ rt->rt_genid = rt_genid(net);
rt->rt_flags |= RTCF_REDIRECTED;
/* Gateway is different ... */
@@ -1297,7 +1335,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
rt->u.dst.expires) {
unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
- rt->fl.oif);
+ rt->fl.oif,
+ rt_genid(dev_net(dst->dev)));
#if RT_CACHE_DEBUG >= 1
printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
NIPQUAD_FMT "/%02x dropped\n",
@@ -1390,7 +1429,8 @@ static int ip_error(struct sk_buff *skb)
break;
case ENETUNREACH:
code = ICMP_NET_UNREACH;
- IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
+ IP_INC_STATS_BH(dev_net(rt->u.dst.dev),
+ IPSTATS_MIB_INNOROUTES);
break;
case EACCES:
code = ICMP_PKT_FILTERED;
@@ -1446,7 +1486,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
for (k = 0; k < 2; k++) {
for (i = 0; i < 2; i++) {
- unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
+ unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
+ rt_genid(net));
rcu_read_lock();
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -1461,7 +1502,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
rth->fl.iif != 0 ||
dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
!net_eq(dev_net(rth->u.dst.dev), net) ||
- rth->rt_genid != atomic_read(&rt_genid))
+ !rt_is_expired(rth))
continue;
if (new_mtu < 68 || new_mtu >= old_mtu) {
@@ -1696,7 +1737,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->fl.oif = 0;
rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
- rth->rt_genid = atomic_read(&rt_genid);
+ rth->rt_genid = rt_genid(dev_net(dev));
rth->rt_flags = RTCF_MULTICAST;
rth->rt_type = RTN_MULTICAST;
if (our) {
@@ -1711,7 +1752,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
RT_CACHE_STAT_INC(in_slow_mc);
in_dev_put(in_dev);
- hash = rt_hash(daddr, saddr, dev->ifindex);
+ hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
return rt_intern_hash(hash, rth, &skb->rtable);
e_nobufs:
@@ -1837,7 +1878,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->u.dst.input = ip_forward;
rth->u.dst.output = ip_output;
- rth->rt_genid = atomic_read(&rt_genid);
+ rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
rt_set_nexthop(rth, res, itag);
@@ -1872,7 +1913,8 @@ static int ip_mkroute_input(struct sk_buff *skb,
return err;
/* put it into the cache */
- hash = rt_hash(daddr, saddr, fl->iif);
+ hash = rt_hash(daddr, saddr, fl->iif,
+ rt_genid(dev_net(rth->u.dst.dev)));
return rt_intern_hash(hash, rth, &skb->rtable);
}
@@ -1998,7 +2040,7 @@ local_input:
goto e_nobufs;
rth->u.dst.output= ip_rt_bug;
- rth->rt_genid = atomic_read(&rt_genid);
+ rth->rt_genid = rt_genid(net);
atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST;
@@ -2028,7 +2070,7 @@ local_input:
rth->rt_flags &= ~RTCF_LOCAL;
}
rth->rt_type = res.type;
- hash = rt_hash(daddr, saddr, fl.iif);
+ hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
err = rt_intern_hash(hash, rth, &skb->rtable);
goto done;
@@ -2079,7 +2121,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
net = dev_net(dev);
tos &= IPTOS_RT_MASK;
- hash = rt_hash(daddr, saddr, iif);
+ hash = rt_hash(daddr, saddr, iif, rt_genid(net));
rcu_read_lock();
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2091,7 +2133,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
(rth->fl.fl4_tos ^ tos)) == 0 &&
rth->fl.mark == skb->mark &&
net_eq(dev_net(rth->u.dst.dev), net) &&
- rth->rt_genid == atomic_read(&rt_genid)) {
+ !rt_is_expired(rth)) {
dst_use(&rth->u.dst, jiffies);
RT_CACHE_STAT_INC(in_hit);
rcu_read_unlock();
@@ -2219,7 +2261,7 @@ static int __mkroute_output(struct rtable **result,
rth->rt_spec_dst= fl->fl4_src;
rth->u.dst.output=ip_output;
- rth->rt_genid = atomic_read(&rt_genid);
+ rth->rt_genid = rt_genid(dev_net(dev_out));
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2268,7 +2310,8 @@ static int ip_mkroute_output(struct rtable **rp,
int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags);
unsigned hash;
if (err == 0) {
- hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif);
+ hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,
+ rt_genid(dev_net(dev_out)));
err = rt_intern_hash(hash, rth, rp);
}
@@ -2480,7 +2523,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
unsigned hash;
struct rtable *rth;
- hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif);
+ hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));
rcu_read_lock_bh();
for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
@@ -2493,7 +2536,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
!((rth->fl.fl4_tos ^ flp->fl4_tos) &
(IPTOS_RT_MASK | RTO_ONLINK)) &&
net_eq(dev_net(rth->u.dst.dev), net) &&
- rth->rt_genid == atomic_read(&rt_genid)) {
+ !rt_is_expired(rth)) {
dst_use(&rth->u.dst, jiffies);
RT_CACHE_STAT_INC(out_hit);
rcu_read_unlock_bh();
@@ -2524,7 +2567,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
};
-static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
+static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi *flp)
{
struct rtable *ort = *rp;
struct rtable *rt = (struct rtable *)
@@ -2548,7 +2591,7 @@ static int ipv4_dst_blackhole(struct rtable **rp, struct flowi *flp)
rt->idev = ort->idev;
if (rt->idev)
in_dev_hold(rt->idev);
- rt->rt_genid = atomic_read(&rt_genid);
+ rt->rt_genid = rt_genid(net);
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
rt->rt_dst = ort->rt_dst;
@@ -2584,7 +2627,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
err = __xfrm_lookup((struct dst_entry **)rp, flp, sk,
flags ? XFRM_LOOKUP_WAIT : 0);
if (err == -EREMOTE)
- err = ipv4_dst_blackhole(rp, flp);
+ err = ipv4_dst_blackhole(net, rp, flp);
return err;
}
@@ -2803,7 +2846,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
rt = rcu_dereference(rt->u.dst.rt_next), idx++) {
if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
continue;
- if (rt->rt_genid != atomic_read(&rt_genid))
+ if (rt_is_expired(rt))
continue;
skb->dst = dst_clone(&rt->u.dst);
if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
@@ -2827,19 +2870,25 @@ done:
void ip_rt_multicast_event(struct in_device *in_dev)
{
- rt_cache_flush(0);
+ rt_cache_flush(dev_net(in_dev->dev), 0);
}
#ifdef CONFIG_SYSCTL
-static int flush_delay;
-
-static int ipv4_sysctl_rtcache_flush(ctl_table *ctl, int write,
+static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
struct file *filp, void __user *buffer,
size_t *lenp, loff_t *ppos)
{
if (write) {
- proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
- rt_cache_flush(flush_delay);
+ int flush_delay;
+ ctl_table ctl;
+ struct net *net;
+
+ memcpy(&ctl, __ctl, sizeof(ctl));
+ ctl.data = &flush_delay;
+ proc_dointvec(&ctl, write, filp, buffer, lenp, ppos);
+
+ net = (struct net *)__ctl->extra1;
+ rt_cache_flush(net, flush_delay);
return 0;
}
@@ -2855,25 +2904,18 @@ static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table,
size_t newlen)
{
int delay;
+ struct net *net;
if (newlen != sizeof(int))
return -EINVAL;
if (get_user(delay, (int __user *)newval))
return -EFAULT;
- rt_cache_flush(delay);
+ net = (struct net *)table->extra1;
+ rt_cache_flush(net, delay);
return 0;
}
ctl_table ipv4_route_table[] = {
{
- .ctl_name = NET_IPV4_ROUTE_FLUSH,
- .procname = "flush",
- .data = &flush_delay,
- .maxlen = sizeof(int),
- .mode = 0200,
- .proc_handler = &ipv4_sysctl_rtcache_flush,
- .strategy = &ipv4_sysctl_rtcache_flush_strategy,
- },
- {
.ctl_name = NET_IPV4_ROUTE_GC_THRESH,
.procname = "gc_thresh",
.data = &ipv4_dst_ops.gc_thresh,
@@ -3011,8 +3053,97 @@ ctl_table ipv4_route_table[] = {
},
{ .ctl_name = 0 }
};
+
+static __net_initdata struct ctl_path ipv4_route_path[] = {
+ { .procname = "net", .ctl_name = CTL_NET, },
+ { .procname = "ipv4", .ctl_name = NET_IPV4, },
+ { .procname = "route", .ctl_name = NET_IPV4_ROUTE, },
+ { },
+};
+
+
+static struct ctl_table ipv4_route_flush_table[] = {
+ {
+ .ctl_name = NET_IPV4_ROUTE_FLUSH,
+ .procname = "flush",
+ .maxlen = sizeof(int),
+ .mode = 0200,
+ .proc_handler = &ipv4_sysctl_rtcache_flush,
+ .strategy = &ipv4_sysctl_rtcache_flush_strategy,
+ },
+ { .ctl_name = 0 },
+};
+
+static __net_init int sysctl_route_net_init(struct net *net)
+{
+ struct ctl_table *tbl;
+
+ tbl = ipv4_route_flush_table;
+ if (net != &init_net) {
+ tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
+ if (tbl == NULL)
+ goto err_dup;
+ }
+ tbl[0].extra1 = net;
+
+ net->ipv4.route_hdr =
+ register_net_sysctl_table(net, ipv4_route_path, tbl);
+ if (net->ipv4.route_hdr == NULL)
+ goto err_reg;
+ return 0;
+
+err_reg:
+ if (tbl != ipv4_route_flush_table)
+ kfree(tbl);
+err_dup:
+ return -ENOMEM;
+}
+
+static __net_exit void sysctl_route_net_exit(struct net *net)
+{
+ struct ctl_table *tbl;
+
+ tbl = net->ipv4.route_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->ipv4.route_hdr);
+ BUG_ON(tbl == ipv4_route_flush_table);
+ kfree(tbl);
+}
+
+static __net_initdata struct pernet_operations sysctl_route_ops = {
+ .init = sysctl_route_net_init,
+ .exit = sysctl_route_net_exit,
+};
#endif
+
+static __net_init int rt_secret_timer_init(struct net *net)
+{
+ atomic_set(&net->ipv4.rt_genid,
+ (int) ((num_physpages ^ (num_physpages>>8)) ^
+ (jiffies ^ (jiffies >> 7))));
+
+ net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
+ net->ipv4.rt_secret_timer.data = (unsigned long)net;
+ init_timer_deferrable(&net->ipv4.rt_secret_timer);
+
+ net->ipv4.rt_secret_timer.expires =
+ jiffies + net_random() % ip_rt_secret_interval +
+ ip_rt_secret_interval;
+ add_timer(&net->ipv4.rt_secret_timer);
+ return 0;
+}
+
+static __net_exit void rt_secret_timer_exit(struct net *net)
+{
+ del_timer_sync(&net->ipv4.rt_secret_timer);
+}
+
+static __net_initdata struct pernet_operations rt_secret_timer_ops = {
+ .init = rt_secret_timer_init,
+ .exit = rt_secret_timer_exit,
+};
+
+
#ifdef CONFIG_NET_CLS_ROUTE
struct ip_rt_acct *ip_rt_acct __read_mostly;
#endif /* CONFIG_NET_CLS_ROUTE */
@@ -3031,9 +3162,6 @@ int __init ip_rt_init(void)
{
int rc = 0;
- atomic_set(&rt_genid, (int) ((num_physpages ^ (num_physpages>>8)) ^
- (jiffies ^ (jiffies >> 7))));
-
#ifdef CONFIG_NET_CLS_ROUTE
ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
if (!ip_rt_acct)
@@ -3065,19 +3193,14 @@ int __init ip_rt_init(void)
devinet_init();
ip_fib_init();
- rt_secret_timer.function = rt_secret_rebuild;
- rt_secret_timer.data = 0;
- init_timer_deferrable(&rt_secret_timer);
-
/* All the timers, started at system startup tend
to synchronize. Perturb it a bit.
*/
schedule_delayed_work(&expires_work,
net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
- rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval +
- ip_rt_secret_interval;
- add_timer(&rt_secret_timer);
+ if (register_pernet_subsys(&rt_secret_timer_ops))
+ printk(KERN_ERR "Unable to setup rt_secret_timer\n");
if (ip_rt_proc_init())
printk(KERN_ERR "Unable to create route proc files\n");
@@ -3087,6 +3210,9 @@ int __init ip_rt_init(void)
#endif
rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
+#ifdef CONFIG_SYSCTL
+ register_pernet_subsys(&sysctl_route_ops);
+#endif
return rc;
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index d182a2a26291..51bc24d3b8a7 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -8,8 +8,6 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
- *
- * $Id: syncookies.c,v 1.18 2002/02/01 22:01:04 davem Exp $
*/
#include <linux/tcp.h>
@@ -175,7 +173,7 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
;
*mssp = msstab[mssind] + 1;
- NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
th->source, th->dest, ntohl(th->seq),
@@ -271,11 +269,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
(mss = cookie_check(skb, cookie)) == 0) {
- NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
- NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c437f804ee38..14ef202a2254 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1,8 +1,6 @@
/*
* sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
*
- * $Id: sysctl_net_ipv4.c,v 1.50 2001/10/20 00:00:11 davem Exp $
- *
* Begun April 1, 1996, Mike Shaver.
* Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
*/
@@ -795,7 +793,8 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_icmp_ratelimit,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec_ms_jiffies,
+ .strategy = &sysctl_ms_jiffies
},
{
.ctl_name = NET_IPV4_ICMP_RATEMASK,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 850825dc86e6..0b491bf03db4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5,8 +5,6 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp.c,v 1.216 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -255,6 +253,7 @@
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/skbuff.h>
+#include <linux/scatterlist.h>
#include <linux/splice.h>
#include <linux/net.h>
#include <linux/socket.h>
@@ -278,8 +277,6 @@
int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
-DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly;
-
atomic_t tcp_orphan_count = ATOMIC_INIT(0);
EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -317,10 +314,10 @@ int tcp_memory_pressure __read_mostly;
EXPORT_SYMBOL(tcp_memory_pressure);
-void tcp_enter_memory_pressure(void)
+void tcp_enter_memory_pressure(struct sock *sk)
{
if (!tcp_memory_pressure) {
- NET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
tcp_memory_pressure = 1;
}
}
@@ -345,8 +342,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
return inet_csk_listen_poll(sk);
/* Socket is not locked. We are protected from async events
- by poll logic and correct handling of state changes
- made by another threads is impossible in any case.
+ * by poll logic and correct handling of state changes
+ * made by other threads is impossible in any case.
*/
mask = 0;
@@ -372,10 +369,10 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
* in state CLOSE_WAIT. One solution is evident --- to set POLLHUP
* if and only if shutdown has been made in both directions.
* Actually, it is interesting to look how Solaris and DUX
- * solve this dilemma. I would prefer, if PULLHUP were maskable,
+ * solve this dilemma. I would prefer, if POLLHUP were maskable,
* then we could set it on SND_SHUTDOWN. BTW examples given
* in Stevens' books assume exactly this behaviour, it explains
- * why PULLHUP is incompatible with POLLOUT. --ANK
+ * why POLLHUP is incompatible with POLLOUT. --ANK
*
* NOTE. Check for TCP_CLOSE is added. The goal is to prevent
* blocking on fresh not-connected or disconnected socket. --ANK
@@ -650,7 +647,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
}
__kfree_skb(skb);
} else {
- sk->sk_prot->enter_memory_pressure();
+ sk->sk_prot->enter_memory_pressure(sk);
sk_stream_moderate_sndbuf(sk);
}
return NULL;
@@ -1154,7 +1151,7 @@ static void tcp_prequeue_process(struct sock *sk)
struct sk_buff *skb;
struct tcp_sock *tp = tcp_sk(sk);
- NET_INC_STATS_USER(LINUX_MIB_TCPPREQUEUED);
+ NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
/* RX process wants to run with disabled BHs, though it is not
* necessary */
@@ -1208,7 +1205,8 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
return -ENOTCONN;
while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
if (offset < skb->len) {
- size_t used, len;
+ int used;
+ size_t len;
len = skb->len - offset;
/* Stop reading if we hit a patch of urgent data */
@@ -1475,7 +1473,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
/* __ Restore normal policy in scheduler __ */
if ((chunk = len - tp->ucopy.len) != 0) {
- NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
+ NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
len -= chunk;
copied += chunk;
}
@@ -1486,7 +1484,7 @@ do_prequeue:
tcp_prequeue_process(sk);
if ((chunk = len - tp->ucopy.len) != 0) {
- NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+ NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
len -= chunk;
copied += chunk;
}
@@ -1601,7 +1599,7 @@ skip_copy:
tcp_prequeue_process(sk);
if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
- NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+ NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
len -= chunk;
copied += chunk;
}
@@ -1668,12 +1666,12 @@ void tcp_set_state(struct sock *sk, int state)
switch (state) {
case TCP_ESTABLISHED:
if (oldstate != TCP_ESTABLISHED)
- TCP_INC_STATS(TCP_MIB_CURRESTAB);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
break;
case TCP_CLOSE:
if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
- TCP_INC_STATS(TCP_MIB_ESTABRESETS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
sk->sk_prot->unhash(sk);
if (inet_csk(sk)->icsk_bind_hash &&
@@ -1682,7 +1680,7 @@ void tcp_set_state(struct sock *sk, int state)
/* fall through */
default:
if (oldstate==TCP_ESTABLISHED)
- TCP_DEC_STATS(TCP_MIB_CURRESTAB);
+ TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
}
/* Change state AFTER socket is unhashed to avoid closed
@@ -1793,13 +1791,13 @@ void tcp_close(struct sock *sk, long timeout)
*/
if (data_was_unread) {
/* Unread data was tossed, zap the connection. */
- NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE);
+ NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_KERNEL);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
- NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA);
+ NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
} else if (tcp_close_state(sk)) {
/* We FIN if the application ate all the data before
* zapping the connection.
@@ -1871,7 +1869,8 @@ adjudge_to_death:
if (tp->linger2 < 0) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPABORTONLINGER);
} else {
const int tmo = tcp_fin_time(sk);
@@ -1893,7 +1892,8 @@ adjudge_to_death:
"sockets\n");
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPABORTONMEMORY);
}
}
@@ -2588,12 +2588,69 @@ void __tcp_put_md5sig_pool(void)
}
EXPORT_SYMBOL(__tcp_put_md5sig_pool);
+
+int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
+ struct tcphdr *th)
+{
+ struct scatterlist sg;
+ int err;
+
+ __sum16 old_checksum = th->check;
+ th->check = 0;
+ /* options aren't included in the hash */
+ sg_init_one(&sg, th, sizeof(struct tcphdr));
+ err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr));
+ th->check = old_checksum;
+ return err;
+}
+
+EXPORT_SYMBOL(tcp_md5_hash_header);
+
+int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
+ struct sk_buff *skb, unsigned header_len)
+{
+ struct scatterlist sg;
+ const struct tcphdr *tp = tcp_hdr(skb);
+ struct hash_desc *desc = &hp->md5_desc;
+ unsigned i;
+ const unsigned head_data_len = skb_headlen(skb) > header_len ?
+ skb_headlen(skb) - header_len : 0;
+ const struct skb_shared_info *shi = skb_shinfo(skb);
+
+ sg_init_table(&sg, 1);
+
+ sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
+ if (crypto_hash_update(desc, &sg, head_data_len))
+ return 1;
+
+ for (i = 0; i < shi->nr_frags; ++i) {
+ const struct skb_frag_struct *f = &shi->frags[i];
+ sg_set_page(&sg, f->page, f->size, f->page_offset);
+ if (crypto_hash_update(desc, &sg, f->size))
+ return 1;
+ }
+
+ return 0;
+}
+
+EXPORT_SYMBOL(tcp_md5_hash_skb_data);
+
+int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
+{
+ struct scatterlist sg;
+
+ sg_init_one(&sg, key->key, key->keylen);
+ return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
+}
+
+EXPORT_SYMBOL(tcp_md5_hash_key);
+
#endif
void tcp_done(struct sock *sk)
{
if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
- TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
tcp_set_state(sk, TCP_CLOSE);
tcp_clear_xmit_timers(sk);
@@ -2730,4 +2787,3 @@ EXPORT_SYMBOL(tcp_splice_read);
EXPORT_SYMBOL(tcp_sendpage);
EXPORT_SYMBOL(tcp_setsockopt);
EXPORT_SYMBOL(tcp_shutdown);
-EXPORT_SYMBOL(tcp_statistics);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 2fbcc7d1b1a0..838d491dfda7 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -1,8 +1,6 @@
/*
* tcp_diag.c Module for monitoring TCP transport protocols sockets.
*
- * Version: $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cad73b7dfef0..1f5e6049883e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,8 +5,6 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.243 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -604,7 +602,7 @@ static u32 tcp_rto_min(struct sock *sk)
u32 rto_min = TCP_RTO_MIN;
if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
- rto_min = dst_metric(dst, RTAX_RTO_MIN);
+ rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
return rto_min;
}
@@ -731,6 +729,7 @@ void tcp_update_metrics(struct sock *sk)
if (dst && (dst->flags & DST_HOST)) {
const struct inet_connection_sock *icsk = inet_csk(sk);
int m;
+ unsigned long rtt;
if (icsk->icsk_backoff || !tp->srtt) {
/* This session failed to estimate rtt. Why?
@@ -742,7 +741,8 @@ void tcp_update_metrics(struct sock *sk)
return;
}
- m = dst_metric(dst, RTAX_RTT) - tp->srtt;
+ rtt = dst_metric_rtt(dst, RTAX_RTT);
+ m = rtt - tp->srtt;
/* If newly calculated rtt larger than stored one,
* store new one. Otherwise, use EWMA. Remember,
@@ -750,12 +750,13 @@ void tcp_update_metrics(struct sock *sk)
*/
if (!(dst_metric_locked(dst, RTAX_RTT))) {
if (m <= 0)
- dst->metrics[RTAX_RTT - 1] = tp->srtt;
+ set_dst_metric_rtt(dst, RTAX_RTT, tp->srtt);
else
- dst->metrics[RTAX_RTT - 1] -= (m >> 3);
+ set_dst_metric_rtt(dst, RTAX_RTT, rtt - (m >> 3));
}
if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
+ unsigned long var;
if (m < 0)
m = -m;
@@ -764,11 +765,13 @@ void tcp_update_metrics(struct sock *sk)
if (m < tp->mdev)
m = tp->mdev;
- if (m >= dst_metric(dst, RTAX_RTTVAR))
- dst->metrics[RTAX_RTTVAR - 1] = m;
+ var = dst_metric_rtt(dst, RTAX_RTTVAR);
+ if (m >= var)
+ var = m;
else
- dst->metrics[RTAX_RTTVAR-1] -=
- (dst_metric(dst, RTAX_RTTVAR) - m)>>2;
+ var -= (var - m) >> 2;
+
+ set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
}
if (tp->snd_ssthresh >= 0xFFFF) {
@@ -899,7 +902,7 @@ static void tcp_init_metrics(struct sock *sk)
if (dst_metric(dst, RTAX_RTT) == 0)
goto reset;
- if (!tp->srtt && dst_metric(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
+ if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
goto reset;
/* Initial rtt is determined from SYN,SYN-ACK.
@@ -916,12 +919,12 @@ static void tcp_init_metrics(struct sock *sk)
* to low value, and then abruptly stops to do it and starts to delay
* ACKs, wait for troubles.
*/
- if (dst_metric(dst, RTAX_RTT) > tp->srtt) {
- tp->srtt = dst_metric(dst, RTAX_RTT);
+ if (dst_metric_rtt(dst, RTAX_RTT) > tp->srtt) {
+ tp->srtt = dst_metric_rtt(dst, RTAX_RTT);
tp->rtt_seq = tp->snd_nxt;
}
- if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) {
- tp->mdev = dst_metric(dst, RTAX_RTTVAR);
+ if (dst_metric_rtt(dst, RTAX_RTTVAR) > tp->mdev) {
+ tp->mdev = dst_metric_rtt(dst, RTAX_RTTVAR);
tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
}
tcp_set_rto(sk);
@@ -949,17 +952,21 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
{
struct tcp_sock *tp = tcp_sk(sk);
if (metric > tp->reordering) {
+ int mib_idx;
+
tp->reordering = min(TCP_MAX_REORDERING, metric);
/* This exciting event is worth to be remembered. 8) */
if (ts)
- NET_INC_STATS_BH(LINUX_MIB_TCPTSREORDER);
+ mib_idx = LINUX_MIB_TCPTSREORDER;
else if (tcp_is_reno(tp))
- NET_INC_STATS_BH(LINUX_MIB_TCPRENOREORDER);
+ mib_idx = LINUX_MIB_TCPRENOREORDER;
else if (tcp_is_fack(tp))
- NET_INC_STATS_BH(LINUX_MIB_TCPFACKREORDER);
+ mib_idx = LINUX_MIB_TCPFACKREORDER;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER);
+ mib_idx = LINUX_MIB_TCPSACKREORDER;
+
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
#if FASTRETRANS_DEBUG > 1
printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
@@ -1155,7 +1162,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
tp->lost_out += tcp_skb_pcount(skb);
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
}
- NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
} else {
if (before(ack_seq, new_low_seq))
new_low_seq = ack_seq;
@@ -1167,10 +1174,11 @@ static void tcp_mark_lost_retrans(struct sock *sk)
tp->lost_retrans_low = new_low_seq;
}
-static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
+static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
struct tcp_sack_block_wire *sp, int num_sacks,
u32 prior_snd_una)
{
+ struct tcp_sock *tp = tcp_sk(sk);
u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq);
u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq);
int dup_sack = 0;
@@ -1178,7 +1186,7 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) {
dup_sack = 1;
tcp_dsack_seen(tp);
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKRECV);
} else if (num_sacks > 1) {
u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq);
u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq);
@@ -1187,7 +1195,8 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
!before(start_seq_0, start_seq_1)) {
dup_sack = 1;
tcp_dsack_seen(tp);
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFORECV);
+ NET_INC_STATS_BH(sock_net(sk),
+ LINUX_MIB_TCPDSACKOFORECV);
}
}
@@ -1414,10 +1423,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
unsigned char *ptr = (skb_transport_header(ack_skb) +
TCP_SKB_CB(ack_skb)->sacked);
struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
- struct tcp_sack_block sp[4];
+ struct tcp_sack_block sp[TCP_NUM_SACKS];
struct tcp_sack_block *cache;
struct sk_buff *skb;
- int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE) >> 3;
+ int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
int used_sacks;
int reord = tp->packets_out;
int flag = 0;
@@ -1432,7 +1441,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
tcp_highest_sack_reset(sk);
}
- found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire,
+ found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
num_sacks, prior_snd_una);
if (found_dup_sack)
flag |= FLAG_DSACKING_ACK;
@@ -1458,18 +1467,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
if (!tcp_is_sackblock_valid(tp, dup_sack,
sp[used_sacks].start_seq,
sp[used_sacks].end_seq)) {
+ int mib_idx;
+
if (dup_sack) {
if (!tp->undo_marker)
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO);
+ mib_idx = LINUX_MIB_TCPDSACKIGNOREDNOUNDO;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDOLD);
+ mib_idx = LINUX_MIB_TCPDSACKIGNOREDOLD;
} else {
/* Don't count olds caused by ACK reordering */
if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
!after(sp[used_sacks].end_seq, tp->snd_una))
continue;
- NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD);
+ mib_idx = LINUX_MIB_TCPSACKDISCARD;
}
+
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
if (i == 0)
first_sack_index = -1;
continue;
@@ -1962,7 +1975,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag)
{
if (flag & FLAG_SACK_RENEGING) {
struct inet_connection_sock *icsk = inet_csk(sk);
- NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
tcp_enter_loss(sk, 1);
icsk->icsk_retransmits++;
@@ -2382,15 +2395,19 @@ static int tcp_try_undo_recovery(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_may_undo(tp)) {
+ int mib_idx;
+
/* Happy end! We did not retransmit anything
* or our original transmission succeeded.
*/
DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
tcp_undo_cwr(sk, 1);
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
- NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
+ mib_idx = LINUX_MIB_TCPLOSSUNDO;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO);
+ mib_idx = LINUX_MIB_TCPFULLUNDO;
+
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
tp->undo_marker = 0;
}
if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
@@ -2413,7 +2430,7 @@ static void tcp_try_undo_dsack(struct sock *sk)
DBGUNDO(sk, "D-SACK");
tcp_undo_cwr(sk, 1);
tp->undo_marker = 0;
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
}
}
@@ -2436,7 +2453,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
DBGUNDO(sk, "Hoe");
tcp_undo_cwr(sk, 0);
- NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
/* So... Do not make Hoe's retransmit yet.
* If the first packet was delayed, the rest
@@ -2465,7 +2482,7 @@ static int tcp_try_undo_loss(struct sock *sk)
DBGUNDO(sk, "partial loss");
tp->lost_out = 0;
tcp_undo_cwr(sk, 1);
- NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
inet_csk(sk)->icsk_retransmits = 0;
tp->undo_marker = 0;
if (tcp_is_sack(tp))
@@ -2562,7 +2579,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
- int fast_rexmit = 0;
+ int fast_rexmit = 0, mib_idx;
if (WARN_ON(!tp->packets_out && tp->sacked_out))
tp->sacked_out = 0;
@@ -2584,7 +2601,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
icsk->icsk_ca_state != TCP_CA_Open &&
tp->fackets_out > tp->reordering) {
tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering);
- NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
}
/* D. Check consistency of the current state. */
@@ -2685,9 +2702,11 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
/* Otherwise enter Recovery state */
if (tcp_is_reno(tp))
- NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERY);
+ mib_idx = LINUX_MIB_TCPRENORECOVERY;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERY);
+ mib_idx = LINUX_MIB_TCPSACKRECOVERY;
+
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
tp->high_seq = tp->snd_nxt;
tp->prior_ssthresh = 0;
@@ -3198,7 +3217,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
}
tp->frto_counter = 0;
tp->undo_marker = 0;
- NET_INC_STATS_BH(LINUX_MIB_TCPSPURIOUSRTOS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
}
return 0;
}
@@ -3251,12 +3270,12 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
tcp_ca_event(sk, CA_EVENT_FAST_ACK);
- NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
} else {
if (ack_seq != TCP_SKB_CB(skb)->end_seq)
flag |= FLAG_DATA;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPPUREACKS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
@@ -3450,6 +3469,43 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
return 1;
}
+#ifdef CONFIG_TCP_MD5SIG
+/*
+ * Parse MD5 Signature option
+ */
+u8 *tcp_parse_md5sig_option(struct tcphdr *th)
+{
+ int length = (th->doff << 2) - sizeof (*th);
+ u8 *ptr = (u8*)(th + 1);
+
+ /* If the TCP option is too short, we can short cut */
+ if (length < TCPOLEN_MD5SIG)
+ return NULL;
+
+ while (length > 0) {
+ int opcode = *ptr++;
+ int opsize;
+
+ switch(opcode) {
+ case TCPOPT_EOL:
+ return NULL;
+ case TCPOPT_NOP:
+ length--;
+ continue;
+ default:
+ opsize = *ptr++;
+ if (opsize < 2 || opsize > length)
+ return NULL;
+ if (opcode == TCPOPT_MD5SIG)
+ return ptr;
+ }
+ ptr += opsize - 2;
+ length -= opsize;
+ }
+ return NULL;
+}
+#endif
+
static inline void tcp_store_ts_recent(struct tcp_sock *tp)
{
tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
@@ -3662,26 +3718,33 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
return 0;
}
-static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
+ int mib_idx;
+
if (before(seq, tp->rcv_nxt))
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOLDSENT);
+ mib_idx = LINUX_MIB_TCPDSACKOLDSENT;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPDSACKOFOSENT);
+ mib_idx = LINUX_MIB_TCPDSACKOFOSENT;
+
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
tp->rx_opt.dsack = 1;
tp->duplicate_sack[0].start_seq = seq;
tp->duplicate_sack[0].end_seq = end_seq;
- tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1,
- 4 - tp->rx_opt.tstamp_ok);
+ tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1;
}
}
-static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (!tp->rx_opt.dsack)
- tcp_dsack_set(tp, seq, end_seq);
+ tcp_dsack_set(sk, seq, end_seq);
else
tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
}
@@ -3692,7 +3755,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk);
if (tcp_is_sack(tp) && sysctl_tcp_dsack) {
@@ -3700,7 +3763,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
end_seq = tp->rcv_nxt;
- tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, end_seq);
+ tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, end_seq);
}
}
@@ -3727,9 +3790,8 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
* Decrease num_sacks.
*/
tp->rx_opt.num_sacks--;
- tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
- tp->rx_opt.dsack,
- 4 - tp->rx_opt.tstamp_ok);
+ tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
+ tp->rx_opt.dsack;
for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
sp[i] = sp[i + 1];
continue;
@@ -3779,7 +3841,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
*
* If the sack array is full, forget about the last one.
*/
- if (this_sack >= 4) {
+ if (this_sack >= TCP_NUM_SACKS) {
this_sack--;
tp->rx_opt.num_sacks--;
sp--;
@@ -3792,8 +3854,7 @@ new_sack:
sp->start_seq = seq;
sp->end_seq = end_seq;
tp->rx_opt.num_sacks++;
- tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack,
- 4 - tp->rx_opt.tstamp_ok);
+ tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
}
/* RCV.NXT advances, some SACKs should be eaten. */
@@ -3830,9 +3891,8 @@ static void tcp_sack_remove(struct tcp_sock *tp)
}
if (num_sacks != tp->rx_opt.num_sacks) {
tp->rx_opt.num_sacks = num_sacks;
- tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
- tp->rx_opt.dsack,
- 4 - tp->rx_opt.tstamp_ok);
+ tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
+ tp->rx_opt.dsack;
}
}
@@ -3853,7 +3913,7 @@ static void tcp_ofo_queue(struct sock *sk)
__u32 dsack = dsack_high;
if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
dsack_high = TCP_SKB_CB(skb)->end_seq;
- tcp_dsack_extend(tp, TCP_SKB_CB(skb)->seq, dsack);
+ tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
}
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
@@ -3911,8 +3971,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
if (tp->rx_opt.dsack) {
tp->rx_opt.dsack = 0;
- tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks,
- 4 - tp->rx_opt.tstamp_ok);
+ tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
}
/* Queue data for delivery to the user.
@@ -3981,8 +4040,8 @@ queue_and_out:
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
/* A retransmit, 2nd most common case. Force an immediate ack. */
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
- tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
+ tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
out_of_window:
tcp_enter_quickack_mode(sk);
@@ -4004,7 +4063,7 @@ drop:
tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
TCP_SKB_CB(skb)->end_seq);
- tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
+ tcp_dsack_set(sk, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
/* If window is closed, drop tail of packet. But after
* remembering D-SACK for its head made in previous line.
@@ -4069,12 +4128,12 @@ drop:
if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
/* All the bits are present. Drop. */
__kfree_skb(skb);
- tcp_dsack_set(tp, seq, end_seq);
+ tcp_dsack_set(sk, seq, end_seq);
goto add_sack;
}
if (after(seq, TCP_SKB_CB(skb1)->seq)) {
/* Partial overlap. */
- tcp_dsack_set(tp, seq,
+ tcp_dsack_set(sk, seq,
TCP_SKB_CB(skb1)->end_seq);
} else {
skb1 = skb1->prev;
@@ -4087,12 +4146,12 @@ drop:
(struct sk_buff *)&tp->out_of_order_queue &&
after(end_seq, TCP_SKB_CB(skb1)->seq)) {
if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
- tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
+ tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
end_seq);
break;
}
__skb_unlink(skb1, &tp->out_of_order_queue);
- tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
+ tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
TCP_SKB_CB(skb1)->end_seq);
__kfree_skb(skb1);
}
@@ -4123,7 +4182,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
struct sk_buff *next = skb->next;
__skb_unlink(skb, list);
__kfree_skb(skb);
- NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
skb = next;
continue;
}
@@ -4191,7 +4250,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
struct sk_buff *next = skb->next;
__skb_unlink(skb, list);
__kfree_skb(skb);
- NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
skb = next;
if (skb == tail ||
tcp_hdr(skb)->syn ||
@@ -4254,7 +4313,7 @@ static int tcp_prune_ofo_queue(struct sock *sk)
int res = 0;
if (!skb_queue_empty(&tp->out_of_order_queue)) {
- NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_OFOPRUNED);
__skb_queue_purge(&tp->out_of_order_queue);
/* Reset SACK state. A conforming SACK implementation will
@@ -4283,7 +4342,7 @@ static int tcp_prune_queue(struct sock *sk)
SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
- NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PRUNECALLED);
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk);
@@ -4312,7 +4371,7 @@ static int tcp_prune_queue(struct sock *sk)
* drop receive data on the floor. It will get retransmitted
* and hopefully then we'll have sufficient space.
*/
- NET_INC_STATS_BH(LINUX_MIB_RCVPRUNED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_RCVPRUNED);
/* Massive buffer overcommit. */
tp->pred_flags = 0;
@@ -4742,7 +4801,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_data_snd_check(sk);
return 0;
} else { /* Header too small */
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
} else {
@@ -4779,7 +4838,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
__skb_pull(skb, tcp_header_len);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
- NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
}
if (copied_early)
tcp_cleanup_rbuf(sk, skb->len);
@@ -4802,7 +4861,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
if ((int)skb->truesize > sk->sk_forward_alloc)
goto step5;
- NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
__skb_pull(skb, tcp_header_len);
@@ -4846,7 +4905,7 @@ slow_path:
if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
if (!th->rst) {
- NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
tcp_send_dupack(sk, skb);
goto discard;
}
@@ -4881,8 +4940,8 @@ slow_path:
tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
tcp_reset(sk);
return 1;
}
@@ -4904,7 +4963,7 @@ step5:
return 0;
csum_error:
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
discard:
__kfree_skb(skb);
@@ -4938,7 +4997,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
tcp_time_stamp)) {
- NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
}
@@ -5222,7 +5281,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
if (!th->rst) {
- NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
tcp_send_dupack(sk, skb);
goto discard;
}
@@ -5251,7 +5310,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
* Check for a SYN in window.
*/
if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN);
tcp_reset(sk);
return 1;
}
@@ -5333,7 +5392,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
(TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
tcp_done(sk);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return 1;
}
@@ -5393,7 +5452,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (sk->sk_shutdown & RCV_SHUTDOWN) {
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONDATA);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
tcp_reset(sk);
return 1;
}
@@ -5422,6 +5481,9 @@ EXPORT_SYMBOL(sysctl_tcp_ecn);
EXPORT_SYMBOL(sysctl_tcp_reordering);
EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
EXPORT_SYMBOL(tcp_parse_options);
+#ifdef CONFIG_TCP_MD5SIG
+EXPORT_SYMBOL(tcp_parse_md5sig_option);
+#endif
EXPORT_SYMBOL(tcp_rcv_established);
EXPORT_SYMBOL(tcp_rcv_state_process);
EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ffe869ac1bcf..a82df6307567 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,8 +5,6 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
- *
* IPv4 specific functions
*
*
@@ -89,10 +87,14 @@ int sysctl_tcp_low_latency __read_mostly;
#ifdef CONFIG_TCP_MD5SIG
static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk,
__be32 addr);
-static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
- __be32 saddr, __be32 daddr,
- struct tcphdr *th, int protocol,
- unsigned int tcplen);
+static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+ __be32 daddr, __be32 saddr, struct tcphdr *th);
+#else
+static inline
+struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
+{
+ return NULL;
+}
#endif
struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
@@ -172,7 +174,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->sport, usin->sin_port, sk, 1);
if (tmp < 0) {
if (tmp == -ENETUNREACH)
- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return tmp;
}
@@ -340,16 +342,17 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
struct sock *sk;
__u32 seq;
int err;
+ struct net *net = dev_net(skb->dev);
if (skb->len < (iph->ihl << 2) + 8) {
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
}
- sk = inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->daddr, th->dest,
+ sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
iph->saddr, th->source, inet_iif(skb));
if (!sk) {
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
}
if (sk->sk_state == TCP_TIME_WAIT) {
@@ -362,7 +365,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
* servers this needs to be solved differently.
*/
if (sock_owned_by_user(sk))
- NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == TCP_CLOSE)
goto out;
@@ -371,7 +374,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
seq = ntohl(th->seq);
if (sk->sk_state != TCP_LISTEN &&
!between(seq, tp->snd_una, tp->snd_nxt)) {
- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -418,7 +421,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
BUG_TRAP(!req->sk);
if (seq != tcp_rsk(req)->snt_isn) {
- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -540,6 +543,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
#endif
+ struct net *net;
/* Never send a reset in response to a reset. */
if (th->rst)
@@ -578,12 +582,9 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
rep.th.doff = arg.iov[0].iov_len / 4;
- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1],
- key,
- ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr,
- &rep.th, IPPROTO_TCP,
- arg.iov[0].iov_len);
+ tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
+ key, ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr, &rep.th);
}
#endif
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
@@ -591,20 +592,21 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
sizeof(struct tcphdr), IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
- ip_send_reply(dev_net(skb->dst->dev)->ipv4.tcp_sock, skb,
+ net = dev_net(skb->dst->dev);
+ ip_send_reply(net->ipv4.tcp_sock, skb,
&arg, arg.iov[0].iov_len);
- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
- TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
+ TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+ TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
}
/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
outside socket context is ugly, certainly. What can I do?
*/
-static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
- struct sk_buff *skb, u32 seq, u32 ack,
- u32 win, u32 ts)
+static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+ u32 win, u32 ts, int oif,
+ struct tcp_md5sig_key *key)
{
struct tcphdr *th = tcp_hdr(skb);
struct {
@@ -616,10 +618,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
];
} rep;
struct ip_reply_arg arg;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key;
- struct tcp_md5sig_key tw_key;
-#endif
+ struct net *net = dev_net(skb->dev);
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
@@ -645,23 +644,6 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
rep.th.window = htons(win);
#ifdef CONFIG_TCP_MD5SIG
- /*
- * The SKB holds an imcoming packet, but may not have a valid ->sk
- * pointer. This is especially the case when we're dealing with a
- * TIME_WAIT ack, because the sk structure is long gone, and only
- * the tcp_timewait_sock remains. So the md5 key is stashed in that
- * structure, and we use it in preference. I believe that (twsk ||
- * skb->sk) holds true, but we program defensively.
- */
- if (!twsk && skb->sk) {
- key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr);
- } else if (twsk && twsk->tw_md5_keylen) {
- tw_key.key = twsk->tw_md5_key;
- tw_key.keylen = twsk->tw_md5_keylen;
- key = &tw_key;
- } else
- key = NULL;
-
if (key) {
int offset = (ts) ? 3 : 0;
@@ -672,25 +654,22 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk,
arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
rep.th.doff = arg.iov[0].iov_len/4;
- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset],
- key,
- ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr,
- &rep.th, IPPROTO_TCP,
- arg.iov[0].iov_len);
+ tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
+ key, ip_hdr(skb)->daddr,
+ ip_hdr(skb)->saddr, &rep.th);
}
#endif
arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr, /* XXX */
arg.iov[0].iov_len, IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
- if (twsk)
- arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if;
+ if (oif)
+ arg.bound_dev_if = oif;
- ip_send_reply(dev_net(skb->dev)->ipv4.tcp_sock, skb,
+ ip_send_reply(net->ipv4.tcp_sock, skb,
&arg, arg.iov[0].iov_len);
- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+ TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
}
static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -698,9 +677,12 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
- tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
- tcptw->tw_ts_recent);
+ tcptw->tw_ts_recent,
+ tw->tw_bound_dev_if,
+ tcp_twsk_md5_key(tcptw)
+ );
inet_twsk_put(tw);
}
@@ -708,9 +690,11 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v4_reqsk_send_ack(struct sk_buff *skb,
struct request_sock *req)
{
- tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1,
+ tcp_v4_send_ack(skb, tcp_rsk(req)->snt_isn + 1,
tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd,
- req->ts_recent);
+ req->ts_recent,
+ 0,
+ tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr));
}
/*
@@ -1000,32 +984,13 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
newkey, cmd.tcpm_keylen);
}
-static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
- __be32 saddr, __be32 daddr,
- struct tcphdr *th, int protocol,
- unsigned int tcplen)
+static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
+ __be32 daddr, __be32 saddr, int nbytes)
{
- struct scatterlist sg[4];
- __u16 data_len;
- int block = 0;
- __sum16 old_checksum;
- struct tcp_md5sig_pool *hp;
struct tcp4_pseudohdr *bp;
- struct hash_desc *desc;
- int err;
- unsigned int nbytes = 0;
-
- /*
- * Okay, so RFC2385 is turned on for this connection,
- * so we need to generate the MD5 hash for the packet now.
- */
-
- hp = tcp_get_md5sig_pool();
- if (!hp)
- goto clear_hash_noput;
+ struct scatterlist sg;
bp = &hp->md5_blk.ip4;
- desc = &hp->md5_desc;
/*
* 1. the TCP pseudo-header (in the order: source IP address,
@@ -1035,86 +1000,96 @@ static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
bp->saddr = saddr;
bp->daddr = daddr;
bp->pad = 0;
- bp->protocol = protocol;
- bp->len = htons(tcplen);
-
- sg_init_table(sg, 4);
-
- sg_set_buf(&sg[block++], bp, sizeof(*bp));
- nbytes += sizeof(*bp);
-
- /* 2. the TCP header, excluding options, and assuming a
- * checksum of zero/
- */
- old_checksum = th->check;
- th->check = 0;
- sg_set_buf(&sg[block++], th, sizeof(struct tcphdr));
- nbytes += sizeof(struct tcphdr);
+ bp->protocol = IPPROTO_TCP;
+ bp->len = cpu_to_be16(nbytes);
- /* 3. the TCP segment data (if any) */
- data_len = tcplen - (th->doff << 2);
- if (data_len > 0) {
- unsigned char *data = (unsigned char *)th + (th->doff << 2);
- sg_set_buf(&sg[block++], data, data_len);
- nbytes += data_len;
- }
+ sg_init_one(&sg, bp, sizeof(*bp));
+ return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
+}
- /* 4. an independently-specified key or password, known to both
- * TCPs and presumably connection-specific
- */
- sg_set_buf(&sg[block++], key->key, key->keylen);
- nbytes += key->keylen;
+static int tcp_v4_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+ __be32 daddr, __be32 saddr, struct tcphdr *th)
+{
+ struct tcp_md5sig_pool *hp;
+ struct hash_desc *desc;
- sg_mark_end(&sg[block - 1]);
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ desc = &hp->md5_desc;
- /* Now store the Hash into the packet */
- err = crypto_hash_init(desc);
- if (err)
+ if (crypto_hash_init(desc))
goto clear_hash;
- err = crypto_hash_update(desc, sg, nbytes);
- if (err)
+ if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
goto clear_hash;
- err = crypto_hash_final(desc, md5_hash);
- if (err)
+ if (tcp_md5_hash_header(hp, th))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ if (crypto_hash_final(desc, md5_hash))
goto clear_hash;
- /* Reset header, and free up the crypto */
tcp_put_md5sig_pool();
- th->check = old_checksum;
-
-out:
return 0;
+
clear_hash:
tcp_put_md5sig_pool();
clear_hash_noput:
memset(md5_hash, 0, 16);
- goto out;
+ return 1;
}
-int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
- struct sock *sk,
- struct dst_entry *dst,
- struct request_sock *req,
- struct tcphdr *th, int protocol,
- unsigned int tcplen)
+int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
+ struct sock *sk, struct request_sock *req,
+ struct sk_buff *skb)
{
+ struct tcp_md5sig_pool *hp;
+ struct hash_desc *desc;
+ struct tcphdr *th = tcp_hdr(skb);
__be32 saddr, daddr;
if (sk) {
saddr = inet_sk(sk)->saddr;
daddr = inet_sk(sk)->daddr;
+ } else if (req) {
+ saddr = inet_rsk(req)->loc_addr;
+ daddr = inet_rsk(req)->rmt_addr;
} else {
- struct rtable *rt = (struct rtable *)dst;
- BUG_ON(!rt);
- saddr = rt->rt_src;
- daddr = rt->rt_dst;
+ const struct iphdr *iph = ip_hdr(skb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
}
- return tcp_v4_do_calc_md5_hash(md5_hash, key,
- saddr, daddr,
- th, protocol, tcplen);
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ desc = &hp->md5_desc;
+
+ if (crypto_hash_init(desc))
+ goto clear_hash;
+
+ if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
+ goto clear_hash;
+ if (tcp_md5_hash_header(hp, th))
+ goto clear_hash;
+ if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ if (crypto_hash_final(desc, md5_hash))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
}
-EXPORT_SYMBOL(tcp_v4_calc_md5_hash);
+EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
{
@@ -1130,52 +1105,12 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
struct tcp_md5sig_key *hash_expected;
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
- int length = (th->doff << 2) - sizeof(struct tcphdr);
int genhash;
- unsigned char *ptr;
unsigned char newhash[16];
hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr);
+ hash_location = tcp_parse_md5sig_option(th);
- /*
- * If the TCP option length is less than the TCP_MD5SIG
- * option length, then we can shortcut
- */
- if (length < TCPOLEN_MD5SIG) {
- if (hash_expected)
- return 1;
- else
- return 0;
- }
-
- /* Okay, we can't shortcut - we have to grub through the options */
- ptr = (unsigned char *)(th + 1);
- while (length > 0) {
- int opcode = *ptr++;
- int opsize;
-
- switch (opcode) {
- case TCPOPT_EOL:
- goto done_opts;
- case TCPOPT_NOP:
- length--;
- continue;
- default:
- opsize = *ptr++;
- if (opsize < 2)
- goto done_opts;
- if (opsize > length)
- goto done_opts;
-
- if (opcode == TCPOPT_MD5SIG) {
- hash_location = ptr;
- goto done_opts;
- }
- }
- ptr += opsize-2;
- length -= opsize;
- }
-done_opts:
/* We've parsed the options - do we have a hash? */
if (!hash_expected && !hash_location)
return 0;
@@ -1199,11 +1134,9 @@ done_opts:
/* Okay, so this is hash_expected and hash_location -
* so we need to calculate the checksum.
*/
- genhash = tcp_v4_do_calc_md5_hash(newhash,
- hash_expected,
- iph->saddr, iph->daddr,
- th, sk->sk_protocol,
- skb->len);
+ genhash = tcp_v4_md5_hash_skb(newhash,
+ hash_expected,
+ NULL, NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
if (net_ratelimit()) {
@@ -1347,7 +1280,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
(s32)(peer->tcp_ts - req->ts_recent) >
TCP_PAWS_WINDOW) {
- NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
goto drop_and_release;
}
}
@@ -1452,6 +1385,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
if (newkey != NULL)
tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr,
newkey, key->keylen);
+ newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
}
#endif
@@ -1461,9 +1395,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
return newsk;
exit_overflow:
- NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
exit:
- NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
dst_release(dst);
return NULL;
}
@@ -1590,7 +1524,7 @@ discard:
return 0;
csum_err:
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
@@ -1604,12 +1538,13 @@ int tcp_v4_rcv(struct sk_buff *skb)
struct tcphdr *th;
struct sock *sk;
int ret;
+ struct net *net = dev_net(skb->dev);
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
/* Count it even if it's bad */
- TCP_INC_STATS_BH(TCP_MIB_INSEGS);
+ TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
@@ -1638,7 +1573,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->flags = iph->tos;
TCP_SKB_CB(skb)->sacked = 0;
- sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr,
+ sk = __inet_lookup(net, &tcp_hashinfo, iph->saddr,
th->source, iph->daddr, th->dest, inet_iif(skb));
if (!sk)
goto no_tcp_socket;
@@ -1685,7 +1620,7 @@ no_tcp_socket:
if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
bad_packet:
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
} else {
tcp_v4_send_reset(NULL, skb);
}
@@ -1706,7 +1641,7 @@ do_time_wait:
}
if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
@@ -1814,7 +1749,7 @@ struct inet_connection_sock_af_ops ipv4_specific = {
#ifdef CONFIG_TCP_MD5SIG
static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
.md5_lookup = tcp_v4_md5_lookup,
- .calc_md5_hash = tcp_v4_calc_md5_hash,
+ .calc_md5_hash = tcp_v4_md5_hash_skb,
.md5_add = tcp_v4_md5_add_func,
.md5_parse = tcp_v4_parse_md5_keys,
};
@@ -1871,7 +1806,7 @@ static int tcp_v4_init_sock(struct sock *sk)
return 0;
}
-int tcp_v4_destroy_sock(struct sock *sk)
+void tcp_v4_destroy_sock(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -1915,8 +1850,6 @@ int tcp_v4_destroy_sock(struct sock *sk)
}
atomic_dec(&tcp_sockets_allocated);
-
- return 0;
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -1959,8 +1892,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
req = req->dl_next;
while (1) {
while (req) {
- if (req->rsk_ops->family == st->family &&
- net_eq(sock_net(req->sk), net)) {
+ if (req->rsk_ops->family == st->family) {
cur = req;
goto out;
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 8245247a6ceb..204c42162660 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -5,8 +5,6 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_minisocks.c,v 1.15 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -246,7 +244,7 @@ kill:
}
if (paws_reject)
- NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
if (!th->rst) {
/* In this case we must reset the TIMEWAIT timer.
@@ -482,7 +480,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rx_opt.mss_clamp = req->mss;
TCP_ECN_openreq_child(newtp, req);
- TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
}
return newsk;
}
@@ -613,7 +611,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
if (!(flg & TCP_FLAG_RST))
req->rsk_ops->send_ack(skb, req);
if (paws_reject)
- NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
return NULL;
}
@@ -632,7 +630,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
* "fourth, check the SYN bit"
*/
if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
- TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
goto embryonic_reset;
}
@@ -697,7 +695,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
}
embryonic_reset:
- NET_INC_STATS_BH(LINUX_MIB_EMBRYONICRSTS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
if (!(flg & TCP_FLAG_RST))
req->rsk_ops->send_reset(sk, skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ad993ecb4810..1fa683c0ba9b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -5,8 +5,6 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_output.c,v 1.146 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -347,28 +345,82 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
TCP_SKB_CB(skb)->end_seq = seq;
}
-static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
- __u32 tstamp, __u8 **md5_hash)
-{
- if (tp->rx_opt.tstamp_ok) {
+#define OPTION_SACK_ADVERTISE (1 << 0)
+#define OPTION_TS (1 << 1)
+#define OPTION_MD5 (1 << 2)
+
+struct tcp_out_options {
+ u8 options; /* bit field of OPTION_* */
+ u8 ws; /* window scale, 0 to disable */
+ u8 num_sack_blocks; /* number of SACK blocks to include */
+ u16 mss; /* 0 to disable */
+ __u32 tsval, tsecr; /* need to include OPTION_TS */
+};
+
+static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
+ const struct tcp_out_options *opts,
+ __u8 **md5_hash) {
+ if (unlikely(OPTION_MD5 & opts->options)) {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_MD5SIG << 8) |
+ TCPOLEN_MD5SIG);
+ *md5_hash = (__u8 *)ptr;
+ ptr += 4;
+ } else {
+ *md5_hash = NULL;
+ }
+
+ if (likely(OPTION_TS & opts->options)) {
+ if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) {
+ *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
+ (TCPOLEN_SACK_PERM << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ } else {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ }
+ *ptr++ = htonl(opts->tsval);
+ *ptr++ = htonl(opts->tsecr);
+ }
+
+ if (unlikely(opts->mss)) {
+ *ptr++ = htonl((TCPOPT_MSS << 24) |
+ (TCPOLEN_MSS << 16) |
+ opts->mss);
+ }
+
+ if (unlikely(OPTION_SACK_ADVERTISE & opts->options &&
+ !(OPTION_TS & opts->options))) {
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
- (TCPOPT_TIMESTAMP << 8) |
- TCPOLEN_TIMESTAMP);
- *ptr++ = htonl(tstamp);
- *ptr++ = htonl(tp->rx_opt.ts_recent);
+ (TCPOPT_SACK_PERM << 8) |
+ TCPOLEN_SACK_PERM);
+ }
+
+ if (unlikely(opts->ws)) {
+ *ptr++ = htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_WINDOW << 16) |
+ (TCPOLEN_WINDOW << 8) |
+ opts->ws);
}
- if (tp->rx_opt.eff_sacks) {
- struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
+
+ if (unlikely(opts->num_sack_blocks)) {
+ struct tcp_sack_block *sp = tp->rx_opt.dsack ?
+ tp->duplicate_sack : tp->selective_acks;
int this_sack;
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_SACK << 8) |
- (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
+ (TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
TCPOLEN_SACK_PERBLOCK)));
- for (this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
+ for (this_sack = 0; this_sack < opts->num_sack_blocks;
+ ++this_sack) {
*ptr++ = htonl(sp[this_sack].start_seq);
*ptr++ = htonl(sp[this_sack].end_seq);
}
@@ -378,81 +430,137 @@ static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
tp->rx_opt.eff_sacks--;
}
}
+}
+
+static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct tcp_md5sig_key **md5) {
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned size = 0;
+
#ifdef CONFIG_TCP_MD5SIG
- if (md5_hash) {
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
- *md5_hash = (__u8 *)ptr;
+ *md5 = tp->af_specific->md5_lookup(sk, sk);
+ if (*md5) {
+ opts->options |= OPTION_MD5;
+ size += TCPOLEN_MD5SIG_ALIGNED;
}
+#else
+ *md5 = NULL;
#endif
+
+ /* We always get an MSS option. The option bytes which will be seen in
+ * normal data packets should timestamps be used, must be in the MSS
+ * advertised. But we subtract them from tp->mss_cache so that
+ * calculations in tcp_sendmsg are simpler etc. So account for this
+ * fact here if necessary. If we don't do this correctly, as a
+ * receiver we won't recognize data packets as being full sized when we
+ * should, and thus we won't abide by the delayed ACK rules correctly.
+ * SACKs don't matter, we never delay an ACK when we have any of those
+ * going out. */
+ opts->mss = tcp_advertise_mss(sk);
+ size += TCPOLEN_MSS_ALIGNED;
+
+ if (likely(sysctl_tcp_timestamps && *md5 == NULL)) {
+ opts->options |= OPTION_TS;
+ opts->tsval = TCP_SKB_CB(skb)->when;
+ opts->tsecr = tp->rx_opt.ts_recent;
+ size += TCPOLEN_TSTAMP_ALIGNED;
+ }
+ if (likely(sysctl_tcp_window_scaling)) {
+ opts->ws = tp->rx_opt.rcv_wscale;
+ size += TCPOLEN_WSCALE_ALIGNED;
+ }
+ if (likely(sysctl_tcp_sack)) {
+ opts->options |= OPTION_SACK_ADVERTISE;
+ if (unlikely(!OPTION_TS & opts->options))
+ size += TCPOLEN_SACKPERM_ALIGNED;
+ }
+
+ return size;
}
-/* Construct a tcp options header for a SYN or SYN_ACK packet.
- * If this is every changed make sure to change the definition of
- * MAX_SYN_SIZE to match the new maximum number of options that you
- * can generate.
- *
- * Note - that with the RFC2385 TCP option, we make room for the
- * 16 byte MD5 hash. This will be filled in later, so the pointer for the
- * location to be filled is passed back up.
- */
-static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
- int offer_wscale, int wscale, __u32 tstamp,
- __u32 ts_recent, __u8 **md5_hash)
-{
- /* We always get an MSS option.
- * The option bytes which will be seen in normal data
- * packets should timestamps be used, must be in the MSS
- * advertised. But we subtract them from tp->mss_cache so
- * that calculations in tcp_sendmsg are simpler etc.
- * So account for this fact here if necessary. If we
- * don't do this correctly, as a receiver we won't
- * recognize data packets as being full sized when we
- * should, and thus we won't abide by the delayed ACK
- * rules correctly.
- * SACKs don't matter, we never delay an ACK when we
- * have any of those going out.
- */
- *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
- if (ts) {
- if (sack)
- *ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
- (TCPOLEN_SACK_PERM << 16) |
- (TCPOPT_TIMESTAMP << 8) |
- TCPOLEN_TIMESTAMP);
- else
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_TIMESTAMP << 8) |
- TCPOLEN_TIMESTAMP);
- *ptr++ = htonl(tstamp); /* TSVAL */
- *ptr++ = htonl(ts_recent); /* TSECR */
- } else if (sack)
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_SACK_PERM << 8) |
- TCPOLEN_SACK_PERM);
- if (offer_wscale)
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_WINDOW << 16) |
- (TCPOLEN_WINDOW << 8) |
- (wscale));
+static unsigned tcp_synack_options(struct sock *sk,
+ struct request_sock *req,
+ unsigned mss, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct tcp_md5sig_key **md5) {
+ unsigned size = 0;
+ struct inet_request_sock *ireq = inet_rsk(req);
+ char doing_ts;
+
#ifdef CONFIG_TCP_MD5SIG
- /*
- * If MD5 is enabled, then we set the option, and include the size
- * (always 18). The actual MD5 hash is added just before the
- * packet is sent.
- */
- if (md5_hash) {
- *ptr++ = htonl((TCPOPT_NOP << 24) |
- (TCPOPT_NOP << 16) |
- (TCPOPT_MD5SIG << 8) |
- TCPOLEN_MD5SIG);
- *md5_hash = (__u8 *)ptr;
+ *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
+ if (*md5) {
+ opts->options |= OPTION_MD5;
+ size += TCPOLEN_MD5SIG_ALIGNED;
}
+#else
+ *md5 = NULL;
#endif
+
+ /* we can't fit any SACK blocks in a packet with MD5 + TS
+ options. There was discussion about disabling SACK rather than TS in
+ order to fit in better with old, buggy kernels, but that was deemed
+ to be unnecessary. */
+ doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok);
+
+ opts->mss = mss;
+ size += TCPOLEN_MSS_ALIGNED;
+
+ if (likely(ireq->wscale_ok)) {
+ opts->ws = ireq->rcv_wscale;
+ size += TCPOLEN_WSCALE_ALIGNED;
+ }
+ if (likely(doing_ts)) {
+ opts->options |= OPTION_TS;
+ opts->tsval = TCP_SKB_CB(skb)->when;
+ opts->tsecr = req->ts_recent;
+ size += TCPOLEN_TSTAMP_ALIGNED;
+ }
+ if (likely(ireq->sack_ok)) {
+ opts->options |= OPTION_SACK_ADVERTISE;
+ if (unlikely(!doing_ts))
+ size += TCPOLEN_SACKPERM_ALIGNED;
+ }
+
+ return size;
+}
+
+static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ struct tcp_md5sig_key **md5) {
+ struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned size = 0;
+
+#ifdef CONFIG_TCP_MD5SIG
+ *md5 = tp->af_specific->md5_lookup(sk, sk);
+ if (unlikely(*md5)) {
+ opts->options |= OPTION_MD5;
+ size += TCPOLEN_MD5SIG_ALIGNED;
+ }
+#else
+ *md5 = NULL;
+#endif
+
+ if (likely(tp->rx_opt.tstamp_ok)) {
+ opts->options |= OPTION_TS;
+ opts->tsval = tcb ? tcb->when : 0;
+ opts->tsecr = tp->rx_opt.ts_recent;
+ size += TCPOLEN_TSTAMP_ALIGNED;
+ }
+
+ if (unlikely(tp->rx_opt.eff_sacks)) {
+ const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
+ opts->num_sack_blocks =
+ min_t(unsigned, tp->rx_opt.eff_sacks,
+ (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
+ TCPOLEN_SACK_PERBLOCK);
+ size += TCPOLEN_SACK_BASE_ALIGNED +
+ opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
+ }
+
+ return size;
}
/* This routine actually transmits TCP packets queued in by
@@ -473,13 +581,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
struct inet_sock *inet;
struct tcp_sock *tp;
struct tcp_skb_cb *tcb;
- int tcp_header_size;
-#ifdef CONFIG_TCP_MD5SIG
+ struct tcp_out_options opts;
+ unsigned tcp_options_size, tcp_header_size;
struct tcp_md5sig_key *md5;
__u8 *md5_hash_location;
-#endif
struct tcphdr *th;
- int sysctl_flags;
int err;
BUG_ON(!skb || !tcp_skb_pcount(skb));
@@ -502,50 +608,18 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
inet = inet_sk(sk);
tp = tcp_sk(sk);
tcb = TCP_SKB_CB(skb);
- tcp_header_size = tp->tcp_header_len;
-
-#define SYSCTL_FLAG_TSTAMPS 0x1
-#define SYSCTL_FLAG_WSCALE 0x2
-#define SYSCTL_FLAG_SACK 0x4
+ memset(&opts, 0, sizeof(opts));
- sysctl_flags = 0;
- if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
- tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
- if (sysctl_tcp_timestamps) {
- tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
- sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
- }
- if (sysctl_tcp_window_scaling) {
- tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
- sysctl_flags |= SYSCTL_FLAG_WSCALE;
- }
- if (sysctl_tcp_sack) {
- sysctl_flags |= SYSCTL_FLAG_SACK;
- if (!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
- tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
- }
- } else if (unlikely(tp->rx_opt.eff_sacks)) {
- /* A SACK is 2 pad bytes, a 2 byte header, plus
- * 2 32-bit sequence numbers for each SACK block.
- */
- tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
- (tp->rx_opt.eff_sacks *
- TCPOLEN_SACK_PERBLOCK));
- }
+ if (unlikely(tcb->flags & TCPCB_FLAG_SYN))
+ tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
+ else
+ tcp_options_size = tcp_established_options(sk, skb, &opts,
+ &md5);
+ tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
-#ifdef CONFIG_TCP_MD5SIG
- /*
- * Are we doing MD5 on this segment? If so - make
- * room for it.
- */
- md5 = tp->af_specific->md5_lookup(sk, sk);
- if (md5)
- tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
-#endif
-
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
skb_set_owner_w(skb, sk);
@@ -576,39 +650,16 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->urg = 1;
}
- if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
- tcp_syn_build_options((__be32 *)(th + 1),
- tcp_advertise_mss(sk),
- (sysctl_flags & SYSCTL_FLAG_TSTAMPS),
- (sysctl_flags & SYSCTL_FLAG_SACK),
- (sysctl_flags & SYSCTL_FLAG_WSCALE),
- tp->rx_opt.rcv_wscale,
- tcb->when,
- tp->rx_opt.ts_recent,
-
-#ifdef CONFIG_TCP_MD5SIG
- md5 ? &md5_hash_location :
-#endif
- NULL);
- } else {
- tcp_build_and_update_options((__be32 *)(th + 1),
- tp, tcb->when,
-#ifdef CONFIG_TCP_MD5SIG
- md5 ? &md5_hash_location :
-#endif
- NULL);
+ tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
+ if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))
TCP_ECN_send(sk, skb, tcp_header_size);
- }
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
if (md5) {
+ sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
tp->af_specific->calc_md5_hash(md5_hash_location,
- md5,
- sk, NULL, NULL,
- tcp_hdr(skb),
- sk->sk_protocol,
- skb->len);
+ md5, sk, NULL, skb);
}
#endif
@@ -621,7 +672,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tcp_event_data_sent(tp, skb, sk);
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
- TCP_INC_STATS(TCP_MIB_OUTSEGS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
err = icsk->icsk_af_ops->queue_xmit(skb, 0);
if (likely(err <= 0))
@@ -630,10 +681,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tcp_enter_cwr(sk, 1);
return net_xmit_eval(err);
-
-#undef SYSCTL_FLAG_TSTAMPS
-#undef SYSCTL_FLAG_WSCALE
-#undef SYSCTL_FLAG_SACK
}
/* This routine just queue's the buffer
@@ -974,6 +1021,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
u32 mss_now;
u16 xmit_size_goal;
int doing_tso = 0;
+ unsigned header_len;
+ struct tcp_out_options opts;
+ struct tcp_md5sig_key *md5;
mss_now = tp->mss_cache;
@@ -986,14 +1036,16 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
mss_now = tcp_sync_mss(sk, mtu);
}
- if (tp->rx_opt.eff_sacks)
- mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
- (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
-
-#ifdef CONFIG_TCP_MD5SIG
- if (tp->af_specific->md5_lookup(sk, sk))
- mss_now -= TCPOLEN_MD5SIG_ALIGNED;
-#endif
+ header_len = tcp_established_options(sk, NULL, &opts, &md5) +
+ sizeof(struct tcphdr);
+ /* The mss_cache is sized based on tp->tcp_header_len, which assumes
+ * some common options. If this is an odd packet (because we have SACK
+ * blocks etc) then our calculated header_len will be different, and
+ * we have to adjust mss_now correspondingly */
+ if (header_len != tp->tcp_header_len) {
+ int delta = (int) header_len - tp->tcp_header_len;
+ mss_now -= delta;
+ }
xmit_size_goal = mss_now;
@@ -1913,7 +1965,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (err == 0) {
/* Update global TCP statistics. */
- TCP_INC_STATS(TCP_MIB_RETRANSSEGS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
tp->total_retrans++;
@@ -1988,14 +2040,17 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (sacked & TCPCB_LOST) {
if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
+ int mib_idx;
+
if (tcp_retransmit_skb(sk, skb)) {
tp->retransmit_skb_hint = NULL;
return;
}
if (icsk->icsk_ca_state != TCP_CA_Loss)
- NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
+ mib_idx = LINUX_MIB_TCPFASTRETRANS;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS);
+ mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
if (skb == tcp_write_queue_head(sk))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -2065,7 +2120,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
inet_csk(sk)->icsk_rto,
TCP_RTO_MAX);
- NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFORWARDRETRANS);
}
}
@@ -2119,7 +2174,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
/* NOTE: No TCP options attached and we never retransmit this. */
skb = alloc_skb(MAX_TCP_HEADER, priority);
if (!skb) {
- NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
return;
}
@@ -2130,9 +2185,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
/* Send it off. */
TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (tcp_transmit_skb(sk, skb, 0, priority))
- NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
- TCP_INC_STATS(TCP_MIB_OUTRSTS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
}
/* WARNING: This routine must only be called when we have already sent
@@ -2180,11 +2235,10 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct tcp_sock *tp = tcp_sk(sk);
struct tcphdr *th;
int tcp_header_size;
+ struct tcp_out_options opts;
struct sk_buff *skb;
-#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *md5;
__u8 *md5_hash_location;
-#endif
skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
if (skb == NULL)
@@ -2195,18 +2249,27 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
skb->dst = dst_clone(dst);
- tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
- (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
- (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
- /* SACK_PERM is in the place of NOP NOP of TS */
- ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
+ if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
+ __u8 rcv_wscale;
+ /* Set this up on the first call only */
+ req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
+ /* tcp_full_space because it is guaranteed to be the first packet */
+ tcp_select_initial_window(tcp_full_space(sk),
+ dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+ &req->rcv_wnd,
+ &req->window_clamp,
+ ireq->wscale_ok,
+ &rcv_wscale);
+ ireq->rcv_wscale = rcv_wscale;
+ }
+
+ memset(&opts, 0, sizeof(opts));
+ TCP_SKB_CB(skb)->when = tcp_time_stamp;
+ tcp_header_size = tcp_synack_options(sk, req,
+ dst_metric(dst, RTAX_ADVMSS),
+ skb, &opts, &md5) +
+ sizeof(struct tcphdr);
-#ifdef CONFIG_TCP_MD5SIG
- /* Are we doing MD5 on this segment? If so - make room for it */
- md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
- if (md5)
- tcp_header_size += TCPOLEN_MD5SIG_ALIGNED;
-#endif
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
@@ -2224,19 +2287,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
th->seq = htonl(TCP_SKB_CB(skb)->seq);
th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
- if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
- __u8 rcv_wscale;
- /* Set this up on the first call only */
- req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
- /* tcp_full_space because it is guaranteed to be the first packet */
- tcp_select_initial_window(tcp_full_space(sk),
- dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
- &req->rcv_wnd,
- &req->window_clamp,
- ireq->wscale_ok,
- &rcv_wscale);
- ireq->rcv_wscale = rcv_wscale;
- }
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rcv_wnd, 65535U));
@@ -2245,29 +2295,15 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
else
#endif
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
- tcp_syn_build_options((__be32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
- ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
- TCP_SKB_CB(skb)->when,
- req->ts_recent,
- (
-#ifdef CONFIG_TCP_MD5SIG
- md5 ? &md5_hash_location :
-#endif
- NULL)
- );
-
+ tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
th->doff = (tcp_header_size >> 2);
- TCP_INC_STATS(TCP_MIB_OUTSEGS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
#ifdef CONFIG_TCP_MD5SIG
/* Okay, we have all we need - do the md5 hash if needed */
if (md5) {
tp->af_specific->calc_md5_hash(md5_hash_location,
- md5,
- NULL, dst, req,
- tcp_hdr(skb), sk->sk_protocol,
- skb->len);
+ md5, NULL, req, skb);
}
#endif
@@ -2367,7 +2403,7 @@ int tcp_connect(struct sock *sk)
*/
tp->snd_nxt = tp->write_seq;
tp->pushed_seq = tp->write_seq;
- TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
/* Timer for repeating the SYN until an answer. */
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 5ff0ce6e9d39..7ddc30f0744f 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -224,7 +224,7 @@ static __init int tcpprobe_init(void)
if (bufsize < 0)
return -EINVAL;
- tcp_probe.log = kcalloc(sizeof(struct tcp_log), bufsize, GFP_KERNEL);
+ tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL);
if (!tcp_probe.log)
goto err0;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 63ed9d6830e7..328e0cf42b3c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -5,8 +5,6 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
@@ -50,7 +48,7 @@ static void tcp_write_err(struct sock *sk)
sk->sk_error_report(sk);
tcp_done(sk);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
}
/* Do not allow orphaned sockets to eat all our resources.
@@ -91,7 +89,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
if (do_reset)
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_done(sk);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY);
return 1;
}
return 0;
@@ -181,7 +179,7 @@ static void tcp_delack_timer(unsigned long data)
if (sock_owned_by_user(sk)) {
/* Try again later. */
icsk->icsk_ack.blocked = 1;
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
goto out_unlock;
}
@@ -200,7 +198,7 @@ static void tcp_delack_timer(unsigned long data)
if (!skb_queue_empty(&tp->ucopy.prequeue)) {
struct sk_buff *skb;
- NET_INC_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
sk->sk_backlog_rcv(sk, skb);
@@ -220,7 +218,7 @@ static void tcp_delack_timer(unsigned long data)
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
tcp_send_ack(sk);
- NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);
}
TCP_CHECK_TIMER(sk);
@@ -328,24 +326,27 @@ static void tcp_retransmit_timer(struct sock *sk)
goto out;
if (icsk->icsk_retransmits == 0) {
+ int mib_idx;
+
if (icsk->icsk_ca_state == TCP_CA_Disorder ||
icsk->icsk_ca_state == TCP_CA_Recovery) {
if (tcp_is_sack(tp)) {
if (icsk->icsk_ca_state == TCP_CA_Recovery)
- NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL);
+ mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES);
+ mib_idx = LINUX_MIB_TCPSACKFAILURES;
} else {
if (icsk->icsk_ca_state == TCP_CA_Recovery)
- NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL);
+ mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
else
- NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES);
+ mib_idx = LINUX_MIB_TCPRENOFAILURES;
}
} else if (icsk->icsk_ca_state == TCP_CA_Loss) {
- NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES);
+ mib_idx = LINUX_MIB_TCPLOSSFAILURES;
} else {
- NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS);
+ mib_idx = LINUX_MIB_TCPTIMEOUTS;
}
+ NET_INC_STATS_BH(sock_net(sk), mib_idx);
}
if (tcp_use_frto(sk)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 56fcda3694ba..a751770947a3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -5,8 +5,6 @@
*
* The User Datagram Protocol (UDP).
*
- * Version: $Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $
- *
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
@@ -110,9 +108,6 @@
* Snmp MIB for the UDP layer
*/
-DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly;
-EXPORT_SYMBOL(udp_statistics);
-
DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly;
EXPORT_SYMBOL(udp_stats_in6);
@@ -136,7 +131,7 @@ static inline int __udp_lib_lport_inuse(struct net *net, __u16 num,
struct sock *sk;
struct hlist_node *node;
- sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
+ sk_for_each(sk, node, &udptable[udp_hashfn(net, num)])
if (net_eq(sock_net(sk), net) && sk->sk_hash == num)
return 1;
return 0;
@@ -176,7 +171,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
for (i = 0; i < UDP_HTABLE_SIZE; i++) {
int size = 0;
- head = &udptable[rover & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[udp_hashfn(net, rover)];
if (hlist_empty(head))
goto gotit;
@@ -213,7 +208,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
gotit:
snum = rover;
} else {
- head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[udp_hashfn(net, snum)];
sk_for_each(sk2, node, head)
if (sk2->sk_hash == snum &&
@@ -229,7 +224,7 @@ gotit:
inet_sk(sk)->num = snum;
sk->sk_hash = snum;
if (sk_unhashed(sk)) {
- head = &udptable[snum & (UDP_HTABLE_SIZE - 1)];
+ head = &udptable[udp_hashfn(net, snum)];
sk_add_node(sk, head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
}
@@ -266,7 +261,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
int badness = -1;
read_lock(&udp_hash_lock);
- sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
+ sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
struct inet_sock *inet = inet_sk(sk);
if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -356,11 +351,12 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
struct sock *sk;
int harderr;
int err;
+ struct net *net = dev_net(skb->dev);
- sk = __udp4_lib_lookup(dev_net(skb->dev), iph->daddr, uh->dest,
+ sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
iph->saddr, uh->source, skb->dev->ifindex, udptable);
if (sk == NULL) {
- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return; /* No socket for error */
}
@@ -528,7 +524,8 @@ out:
up->len = 0;
up->pending = 0;
if (!err)
- UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_OUTDATAGRAMS, is_udplite);
return err;
}
@@ -656,11 +653,13 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
.uli_u = { .ports =
{ .sport = inet->sport,
.dport = dport } } };
+ struct net *net = sock_net(sk);
+
security_sk_classify_flow(sk, &fl);
- err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
+ err = ip_route_output_flow(net, &rt, &fl, sk, 1);
if (err) {
if (err == -ENETUNREACH)
- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
goto out;
}
@@ -727,7 +726,8 @@ out:
* seems like overkill.
*/
if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
- UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_SNDBUFERRORS, is_udplite);
}
return err;
@@ -890,7 +890,8 @@ try_again:
goto out_free;
if (!peeked)
- UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
+ UDP_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INDATAGRAMS, is_udplite);
sock_recv_timestamp(msg, sk, skb);
@@ -919,7 +920,7 @@ out:
csum_copy_err:
lock_sock(sk);
if (!skb_kill_datagram(sk, skb, flags))
- UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
+ UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
release_sock(sk);
if (noblock)
@@ -990,7 +991,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
ret = (*up->encap_rcv)(sk, skb);
if (ret <= 0) {
- UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
+ UDP_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_INDATAGRAMS,
is_udplite);
return -ret;
}
@@ -1042,15 +1044,18 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
/* Note that an ENOMEM error is charged twice */
- if (rc == -ENOMEM)
- UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
+ if (rc == -ENOMEM) {
+ UDP_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_RCVBUFERRORS, is_udplite);
+ atomic_inc(&sk->sk_drops);
+ }
goto drop;
}
return 0;
drop:
- UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
+ UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
return -1;
}
@@ -1061,7 +1066,7 @@ drop:
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
*/
-static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
+static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct udphdr *uh,
__be32 saddr, __be32 daddr,
struct hlist_head udptable[])
@@ -1070,7 +1075,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
int dif;
read_lock(&udp_hash_lock);
- sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
+ sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]);
dif = skb->dev->ifindex;
sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) {
@@ -1158,6 +1163,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
struct rtable *rt = (struct rtable*)skb->dst;
__be32 saddr = ip_hdr(skb)->saddr;
__be32 daddr = ip_hdr(skb)->daddr;
+ struct net *net = dev_net(skb->dev);
/*
* Validate the packet.
@@ -1180,9 +1186,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
goto csum_error;
if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
- return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
+ return __udp4_lib_mcast_deliver(net, skb, uh,
+ saddr, daddr, udptable);
- sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr,
+ sk = __udp4_lib_lookup(net, saddr, uh->source, daddr,
uh->dest, inet_iif(skb), udptable);
if (sk != NULL) {
@@ -1211,7 +1218,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
if (udp_lib_checksum_complete(skb))
goto csum_error;
- UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
+ UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
/*
@@ -1245,7 +1252,7 @@ csum_error:
ntohs(uh->dest),
ulen);
drop:
- UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
+ UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
return 0;
}
@@ -1255,12 +1262,11 @@ int udp_rcv(struct sk_buff *skb)
return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
}
-int udp_destroy_sock(struct sock *sk)
+void udp_destroy_sock(struct sock *sk)
{
lock_sock(sk);
udp_flush_pending_frames(sk);
release_sock(sk);
- return 0;
}
/*
@@ -1453,7 +1459,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
spin_lock_bh(&rcvq->lock);
while ((skb = skb_peek(rcvq)) != NULL &&
udp_lib_checksum_complete(skb)) {
- UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_lite);
+ UDP_INC_STATS_BH(sock_net(sk),
+ UDP_MIB_INERRORS, is_lite);
__skb_unlink(skb, rcvq);
kfree_skb(skb);
}
@@ -1629,12 +1636,13 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
__u16 srcp = ntohs(inet->sport);
seq_printf(f, "%4d: %08X:%04X %08X:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p%n",
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %d%n",
bucket, src, srcp, dest, destp, sp->sk_state,
atomic_read(&sp->sk_wmem_alloc),
atomic_read(&sp->sk_rmem_alloc),
0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
- atomic_read(&sp->sk_refcnt), sp, len);
+ atomic_read(&sp->sk_refcnt), sp,
+ atomic_read(&sp->sk_drops), len);
}
int udp4_seq_show(struct seq_file *seq, void *v)
@@ -1643,7 +1651,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%-127s\n",
" sl local_address rem_address st tx_queue "
"rx_queue tr tm->when retrnsmt uid timeout "
- "inode");
+ "inode ref pointer drops");
else {
struct udp_iter_state *state = seq->private;
int len;
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 7288bf7977fb..2e9bad2fa1bc 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -26,7 +26,7 @@ extern int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
extern int udp_sendpage(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
extern int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb);
-extern int udp_destroy_sock(struct sock *sk);
+extern void udp_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
extern int udp4_seq_show(struct seq_file *seq, void *v);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 72ce26b6c4d3..3c807964da96 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -1,8 +1,6 @@
/*
* UDPLITE An implementation of the UDP-Lite protocol (RFC 3828).
*
- * Version: $Id: udplite.c,v 1.25 2006/10/19 07:22:36 gerrit Exp $
- *
* Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk>
*
* Changes:
@@ -13,7 +11,6 @@
* 2 of the License, or (at your option) any later version.
*/
#include "udp_impl.h"
-DEFINE_SNMP_STAT(struct udp_mib, udplite_statistics) __read_mostly;
struct hlist_head udplite_hash[UDP_HTABLE_SIZE];
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 147588f4c7c0..30184e0dd74c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6,8 +6,6 @@
* Pedro Roque <roque@di.fc.ul.pt>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
- * $Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -121,6 +119,7 @@ static void ipv6_regen_rndid(unsigned long data);
static int desync_factor = MAX_DESYNC_FACTOR * HZ;
#endif
+static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
static int ipv6_count_addresses(struct inet6_dev *idev);
/*
@@ -185,6 +184,8 @@ struct ipv6_devconf ipv6_devconf __read_mostly = {
#endif
.proxy_ndp = 0,
.accept_source_route = 0, /* we do not accept RH0 by default. */
+ .disable_ipv6 = 0,
+ .accept_dad = 1,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -217,6 +218,8 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
#endif
.proxy_ndp = 0,
.accept_source_route = 0, /* we do not accept RH0 by default. */
+ .disable_ipv6 = 0,
+ .accept_dad = 1,
};
/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
@@ -226,9 +229,15 @@ const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_IN
const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
/* Check if a valid qdisc is available */
-static inline int addrconf_qdisc_ok(struct net_device *dev)
+static inline bool addrconf_qdisc_ok(const struct net_device *dev)
{
- return (dev->qdisc != &noop_qdisc);
+ return !qdisc_tx_is_noop(dev);
+}
+
+/* Check if a route is valid prefix route */
+static inline int addrconf_is_prefix_route(const struct rt6_info *rt)
+{
+ return ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0);
}
static void addrconf_del_timer(struct inet6_ifaddr *ifp)
@@ -344,6 +353,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
kfree(ndev);
return NULL;
}
+ if (ndev->cnf.forwarding)
+ dev_disable_lro(dev);
/* We refer to the device */
dev_hold(dev);
@@ -372,6 +383,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
*/
in6_dev_hold(ndev);
+ if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
+ ndev->cnf.accept_dad = -1;
+
#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
if (dev->type == ARPHRD_SIT && (dev->priv_flags & IFF_ISATAP)) {
printk(KERN_INFO
@@ -438,6 +452,8 @@ static void dev_forward_change(struct inet6_dev *idev)
if (!idev)
return;
dev = idev->dev;
+ if (idev->cnf.forwarding)
+ dev_disable_lro(dev);
if (dev && (dev->flags & IFF_MULTICAST)) {
if (idev->cnf.forwarding)
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
@@ -483,12 +499,14 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
if (p == &net->ipv6.devconf_dflt->forwarding)
return;
+ rtnl_lock();
if (p == &net->ipv6.devconf_all->forwarding) {
__s32 newf = net->ipv6.devconf_all->forwarding;
net->ipv6.devconf_dflt->forwarding = newf;
addrconf_forward_change(net, newf);
} else if ((!*p) ^ (!old))
dev_forward_change((struct inet6_dev *)table->extra1);
+ rtnl_unlock();
if (*p)
rt6_purge_dflt_routers(net);
@@ -568,6 +586,13 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
struct rt6_info *rt;
int hash;
int err = 0;
+ int addr_type = ipv6_addr_type(addr);
+
+ if (addr_type == IPV6_ADDR_ANY ||
+ addr_type & IPV6_ADDR_MULTICAST ||
+ (!(idev->dev->flags & IFF_LOOPBACK) &&
+ addr_type & IPV6_ADDR_LOOPBACK))
+ return ERR_PTR(-EADDRNOTAVAIL);
rcu_read_lock_bh();
if (idev->dead) {
@@ -749,12 +774,12 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
}
write_unlock_bh(&idev->lock);
+ addrconf_del_timer(ifp);
+
ipv6_ifa_notify(RTM_DELADDR, ifp);
atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp);
- addrconf_del_timer(ifp);
-
/*
* Purge or update corresponding prefix
*
@@ -777,7 +802,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1);
- if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
+ if (rt && addrconf_is_prefix_route(rt)) {
if (onlink == 0) {
ip6_del_rt(rt);
rt = NULL;
@@ -958,7 +983,8 @@ static inline int ipv6_saddr_preferred(int type)
return 0;
}
-static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score,
+static int ipv6_get_saddr_eval(struct net *net,
+ struct ipv6_saddr_score *score,
struct ipv6_saddr_dst *dst,
int i)
{
@@ -1037,7 +1063,8 @@ static int ipv6_get_saddr_eval(struct ipv6_saddr_score *score,
break;
case IPV6_SADDR_RULE_LABEL:
/* Rule 6: Prefer matching label */
- ret = ipv6_addr_label(&score->ifa->addr, score->addr_type,
+ ret = ipv6_addr_label(net,
+ &score->ifa->addr, score->addr_type,
score->ifa->idev->dev->ifindex) == dst->label;
break;
#ifdef CONFIG_IPV6_PRIVACY
@@ -1091,7 +1118,7 @@ int ipv6_dev_get_saddr(struct net_device *dst_dev,
dst.addr = daddr;
dst.ifindex = dst_dev ? dst_dev->ifindex : 0;
dst.scope = __ipv6_addr_src_scope(dst_type);
- dst.label = ipv6_addr_label(daddr, dst_type, dst.ifindex);
+ dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex);
dst.prefs = prefs;
hiscore->rule = -1;
@@ -1159,8 +1186,8 @@ int ipv6_dev_get_saddr(struct net_device *dst_dev,
for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
int minihiscore, miniscore;
- minihiscore = ipv6_get_saddr_eval(hiscore, &dst, i);
- miniscore = ipv6_get_saddr_eval(score, &dst, i);
+ minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i);
+ miniscore = ipv6_get_saddr_eval(net, score, &dst, i);
if (minihiscore > miniscore) {
if (i == IPV6_SADDR_RULE_SCOPE &&
@@ -1400,6 +1427,20 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
void addrconf_dad_failure(struct inet6_ifaddr *ifp)
{
+ struct inet6_dev *idev = ifp->idev;
+ if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) {
+ struct in6_addr addr;
+
+ addr.s6_addr32[0] = htonl(0xfe800000);
+ addr.s6_addr32[1] = 0;
+
+ if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
+ ipv6_addr_equal(&ifp->addr, &addr)) {
+ /* DAD failed for link-local based on MAC address */
+ idev->cnf.disable_ipv6 = 1;
+ }
+ }
+
if (net_ratelimit())
printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
addrconf_dad_stop(ifp);
@@ -1788,7 +1829,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL,
dev->ifindex, 1);
- if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
+ if (rt && addrconf_is_prefix_route(rt)) {
/* Autoconf prefix route */
if (valid_lft == 0) {
ip6_del_rt(rt);
@@ -2732,6 +2773,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
spin_lock_bh(&ifp->lock);
if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
+ idev->cnf.accept_dad < 1 ||
!(ifp->flags&IFA_F_TENTATIVE) ||
ifp->flags & IFA_F_NODAD) {
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
@@ -2779,6 +2821,11 @@ static void addrconf_dad_timer(unsigned long data)
read_unlock_bh(&idev->lock);
goto out;
}
+ if (idev->cnf.accept_dad > 1 && idev->cnf.disable_ipv6) {
+ read_unlock_bh(&idev->lock);
+ addrconf_dad_failure(ifp);
+ return;
+ }
spin_lock_bh(&ifp->lock);
if (ifp->probes == 0) {
/*
@@ -3638,6 +3685,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
#ifdef CONFIG_IPV6_MROUTE
array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
#endif
+ array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6;
+ array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad;
}
static inline size_t inet6_if_nlmsg_size(void)
@@ -4197,6 +4246,22 @@ static struct addrconf_sysctl_table
},
#endif
{
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "disable_ipv6",
+ .data = &ipv6_devconf.disable_ipv6,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "accept_dad",
+ .data = &ipv6_devconf.accept_dad,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
.ctl_name = 0, /* sentinel */
}
},
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 9bfa8846f262..08909039d87b 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -29,6 +29,9 @@
*/
struct ip6addrlbl_entry
{
+#ifdef CONFIG_NET_NS
+ struct net *lbl_net;
+#endif
struct in6_addr prefix;
int prefixlen;
int ifindex;
@@ -46,6 +49,16 @@ static struct ip6addrlbl_table
u32 seq;
} ip6addrlbl_table;
+static inline
+struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
+{
+#ifdef CONFIG_NET_NS
+ return lbl->lbl_net;
+#else
+ return &init_net;
+#endif
+}
+
/*
* Default policy table (RFC3484 + extensions)
*
@@ -65,7 +78,7 @@ static struct ip6addrlbl_table
#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL
-static const __initdata struct ip6addrlbl_init_table
+static const __net_initdata struct ip6addrlbl_init_table
{
const struct in6_addr *prefix;
int prefixlen;
@@ -108,6 +121,9 @@ static const __initdata struct ip6addrlbl_init_table
/* Object management */
static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
{
+#ifdef CONFIG_NET_NS
+ release_net(p->lbl_net);
+#endif
kfree(p);
}
@@ -128,10 +144,13 @@ static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
}
/* Find label */
-static int __ip6addrlbl_match(struct ip6addrlbl_entry *p,
+static int __ip6addrlbl_match(struct net *net,
+ struct ip6addrlbl_entry *p,
const struct in6_addr *addr,
int addrtype, int ifindex)
{
+ if (!net_eq(ip6addrlbl_net(p), net))
+ return 0;
if (p->ifindex && p->ifindex != ifindex)
return 0;
if (p->addrtype && p->addrtype != addrtype)
@@ -141,19 +160,21 @@ static int __ip6addrlbl_match(struct ip6addrlbl_entry *p,
return 1;
}
-static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr,
+static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
+ const struct in6_addr *addr,
int type, int ifindex)
{
struct hlist_node *pos;
struct ip6addrlbl_entry *p;
hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
- if (__ip6addrlbl_match(p, addr, type, ifindex))
+ if (__ip6addrlbl_match(net, p, addr, type, ifindex))
return p;
}
return NULL;
}
-u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
+u32 ipv6_addr_label(struct net *net,
+ const struct in6_addr *addr, int type, int ifindex)
{
u32 label;
struct ip6addrlbl_entry *p;
@@ -161,7 +182,7 @@ u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK;
rcu_read_lock();
- p = __ipv6_addr_label(addr, type, ifindex);
+ p = __ipv6_addr_label(net, addr, type, ifindex);
label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
rcu_read_unlock();
@@ -174,7 +195,8 @@ u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
}
/* allocate one entry */
-static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
+static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
+ const struct in6_addr *prefix,
int prefixlen, int ifindex,
u32 label)
{
@@ -216,6 +238,9 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
newp->addrtype = addrtype;
newp->label = label;
INIT_HLIST_NODE(&newp->list);
+#ifdef CONFIG_NET_NS
+ newp->lbl_net = hold_net(net);
+#endif
atomic_set(&newp->refcnt, 1);
return newp;
}
@@ -237,6 +262,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
hlist_for_each_entry_safe(p, pos, n,
&ip6addrlbl_table.head, list) {
if (p->prefixlen == newp->prefixlen &&
+ net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
p->ifindex == newp->ifindex &&
ipv6_addr_equal(&p->prefix, &newp->prefix)) {
if (!replace) {
@@ -261,7 +287,8 @@ out:
}
/* add a label */
-static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
+static int ip6addrlbl_add(struct net *net,
+ const struct in6_addr *prefix, int prefixlen,
int ifindex, u32 label, int replace)
{
struct ip6addrlbl_entry *newp;
@@ -274,7 +301,7 @@ static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
(unsigned int)label,
replace);
- newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
+ newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label);
if (IS_ERR(newp))
return PTR_ERR(newp);
spin_lock(&ip6addrlbl_table.lock);
@@ -286,7 +313,8 @@ static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
}
/* remove a label */
-static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
+static int __ip6addrlbl_del(struct net *net,
+ const struct in6_addr *prefix, int prefixlen,
int ifindex)
{
struct ip6addrlbl_entry *p = NULL;
@@ -300,6 +328,7 @@ static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
if (p->prefixlen == prefixlen &&
+ net_eq(ip6addrlbl_net(p), net) &&
p->ifindex == ifindex &&
ipv6_addr_equal(&p->prefix, prefix)) {
hlist_del_rcu(&p->list);
@@ -311,7 +340,8 @@ static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
return ret;
}
-static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
+static int ip6addrlbl_del(struct net *net,
+ const struct in6_addr *prefix, int prefixlen,
int ifindex)
{
struct in6_addr prefix_buf;
@@ -324,13 +354,13 @@ static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
spin_lock(&ip6addrlbl_table.lock);
- ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex);
+ ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
spin_unlock(&ip6addrlbl_table.lock);
return ret;
}
/* add default label */
-static __init int ip6addrlbl_init(void)
+static int __net_init ip6addrlbl_net_init(struct net *net)
{
int err = 0;
int i;
@@ -338,7 +368,8 @@ static __init int ip6addrlbl_init(void)
ADDRLABEL(KERN_DEBUG "%s()\n", __func__);
for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
- int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix,
+ int ret = ip6addrlbl_add(net,
+ ip6addrlbl_init_table[i].prefix,
ip6addrlbl_init_table[i].prefixlen,
0,
ip6addrlbl_init_table[i].label, 0);
@@ -349,11 +380,32 @@ static __init int ip6addrlbl_init(void)
return err;
}
+static void __net_exit ip6addrlbl_net_exit(struct net *net)
+{
+ struct ip6addrlbl_entry *p = NULL;
+ struct hlist_node *pos, *n;
+
+ /* Remove all labels belonging to the exiting net */
+ spin_lock(&ip6addrlbl_table.lock);
+ hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
+ if (net_eq(ip6addrlbl_net(p), net)) {
+ hlist_del_rcu(&p->list);
+ ip6addrlbl_put(p);
+ }
+ }
+ spin_unlock(&ip6addrlbl_table.lock);
+}
+
+static struct pernet_operations ipv6_addr_label_ops = {
+ .init = ip6addrlbl_net_init,
+ .exit = ip6addrlbl_net_exit,
+};
+
int __init ipv6_addr_label_init(void)
{
spin_lock_init(&ip6addrlbl_table.lock);
- return ip6addrlbl_init();
+ return register_pernet_subsys(&ipv6_addr_label_ops);
}
static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
@@ -371,9 +423,6 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
u32 label;
int err = 0;
- if (net != &init_net)
- return 0;
-
err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
if (err < 0)
return err;
@@ -385,7 +434,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
if (ifal->ifal_index &&
- !__dev_get_by_index(&init_net, ifal->ifal_index))
+ !__dev_get_by_index(net, ifal->ifal_index))
return -EINVAL;
if (!tb[IFAL_ADDRESS])
@@ -403,12 +452,12 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
switch(nlh->nlmsg_type) {
case RTM_NEWADDRLABEL:
- err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen,
+ err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
ifal->ifal_index, label,
nlh->nlmsg_flags & NLM_F_REPLACE);
break;
case RTM_DELADDRLABEL:
- err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen,
+ err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen,
ifal->ifal_index);
break;
default:
@@ -458,12 +507,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
int idx = 0, s_idx = cb->args[0];
int err;
- if (net != &init_net)
- return 0;
-
rcu_read_lock();
hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
- if (idx >= s_idx) {
+ if (idx >= s_idx &&
+ net_eq(ip6addrlbl_net(p), net)) {
if ((err = ip6addrlbl_fill(skb, p,
ip6addrlbl_table.seq,
NETLINK_CB(cb->skb).pid,
@@ -499,9 +546,6 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
struct ip6addrlbl_entry *p;
struct sk_buff *skb;
- if (net != &init_net)
- return 0;
-
err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
if (err < 0)
return err;
@@ -513,7 +557,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
return -EINVAL;
if (ifal->ifal_index &&
- !__dev_get_by_index(&init_net, ifal->ifal_index))
+ !__dev_get_by_index(net, ifal->ifal_index))
return -EINVAL;
if (!tb[IFAL_ADDRESS])
@@ -524,7 +568,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
return -EINVAL;
rcu_read_lock();
- p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index);
+ p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
if (p && ip6addrlbl_hold(p))
p = NULL;
lseq = ip6addrlbl_table.seq;
@@ -552,7 +596,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
goto out;
}
- err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
out:
return err;
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e84b3fd17fb4..3d828bc4b1cf 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -7,8 +7,6 @@
*
* Adapted from linux/net/ipv4/af_inet.c
*
- * $Id: af_inet6.c,v 1.66 2002/02/01 22:01:04 davem Exp $
- *
* Fixes:
* piggy, Karl Knutson : Socket protocol table
* Hideaki YOSHIFUJI : sin6_scope_id support
@@ -61,9 +59,7 @@
#include <asm/uaccess.h>
#include <asm/system.h>
-#ifdef CONFIG_IPV6_MROUTE
#include <linux/mroute6.h>
-#endif
MODULE_AUTHOR("Cast of dozens");
MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
@@ -373,7 +369,7 @@ int inet6_release(struct socket *sock)
EXPORT_SYMBOL(inet6_release);
-int inet6_destroy_sock(struct sock *sk)
+void inet6_destroy_sock(struct sock *sk)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff *skb;
@@ -391,8 +387,6 @@ int inet6_destroy_sock(struct sock *sk)
if ((opt = xchg(&np->opt, NULL)) != NULL)
sock_kfree_s(sk, opt, opt->tot_len);
-
- return 0;
}
EXPORT_SYMBOL_GPL(inet6_destroy_sock);
@@ -956,9 +950,9 @@ static int __init inet6_init(void)
err = icmpv6_init();
if (err)
goto icmp_fail;
-#ifdef CONFIG_IPV6_MROUTE
- ip6_mr_init();
-#endif
+ err = ip6_mr_init();
+ if (err)
+ goto ipmr_fail;
err = ndisc_init();
if (err)
goto ndisc_fail;
@@ -1061,6 +1055,8 @@ netfilter_fail:
igmp_fail:
ndisc_cleanup();
ndisc_fail:
+ ip6_mr_cleanup();
+ipmr_fail:
icmpv6_cleanup();
icmp_fail:
unregister_pernet_subsys(&inet6_net_ops);
@@ -1115,6 +1111,7 @@ static void __exit inet6_exit(void)
ipv6_netfilter_fini();
igmp6_cleanup();
ndisc_cleanup();
+ ip6_mr_cleanup();
icmpv6_cleanup();
rawv6_exit();
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 0f0f94a40335..f7b535dec860 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -5,8 +5,6 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: datagram.c,v 1.24 2002/02/01 22:01:04 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 3cd1c993d52b..9f1084b4c0e8 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -7,8 +7,6 @@
* Andi Kleen <ak@muc.de>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
- * $Id: exthdrs.c,v 1.13 2001/06/19 15:58:56 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -445,7 +443,7 @@ looped_back:
kfree_skb(skb);
return -1;
}
- if (!ipv6_chk_home_addr(&init_net, addr)) {
+ if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) {
IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
IPSTATS_MIB_INADDRERRORS);
kfree_skb(skb);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index d42dd16d3487..abedf95fdf2d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -5,8 +5,6 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
- *
* Based on net/ipv4/icmp.c
*
* RFC 1885
@@ -956,7 +954,8 @@ ctl_table ipv6_icmp_table_template[] = {
.data = &init_net.ipv6.sysctl.icmpv6_time,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec
+ .proc_handler = &proc_dointvec_ms_jiffies,
+ .strategy = &sysctl_ms_jiffies
},
{ .ctl_name = 0 },
};
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 580014aea4d6..00a8a5f9380c 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -68,7 +68,7 @@ struct sock *__inet6_lookup_established(struct net *net,
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
- unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport);
+ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hashinfo, hash);
@@ -104,7 +104,8 @@ struct sock *inet6_lookup_listener(struct net *net,
int score, hiscore = 0;
read_lock(&hashinfo->lhash_lock);
- sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
+ sk_for_each(sk, node,
+ &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) {
if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum &&
sk->sk_family == PF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -165,14 +166,14 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
const struct in6_addr *saddr = &np->daddr;
const int dif = sk->sk_bound_dev_if;
const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
- const unsigned int hash = inet6_ehashfn(daddr, lport, saddr,
+ struct net *net = sock_net(sk);
+ const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
inet->dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_node *node;
struct inet_timewait_sock *tw;
- struct net *net = sock_net(sk);
prefetch(head->chain.first);
write_lock(lock);
@@ -209,11 +210,11 @@ unique:
if (twp != NULL) {
*twp = tw;
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED);
} else if (tw != NULL) {
/* Silly. Should hash-dance instead... */
inet_twsk_deschedule(tw, death_row);
- NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED);
inet_twsk_put(tw);
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1ee4fa17c129..4de2b9efcacb 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -5,8 +5,6 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 17eb48b8e329..ea81c614dde2 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -6,8 +6,6 @@
* Pedro Roque <roque@di.fc.ul.pt>
* Ian P. Morris <I.P.Morris@soton.ac.uk>
*
- * $Id: ip6_input.c,v 1.19 2000/12/13 18:31:50 davem Exp $
- *
* Based in linux/net/ipv4/ip_input.c
*
* This program is free software; you can redistribute it and/or
@@ -73,7 +71,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES);
- if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
+ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
+ !idev || unlikely(idev->cnf.disable_ipv6)) {
IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS);
rcu_read_unlock();
goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 48cdce9c696c..0981c1ef3057 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -5,8 +5,6 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
- *
* Based on linux/net/ipv4/ip_output.c
*
* This program is free software; you can redistribute it and/or
@@ -175,6 +173,13 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
int ip6_output(struct sk_buff *skb)
{
+ struct inet6_dev *idev = ip6_dst_idev(skb->dst);
+ if (unlikely(idev->cnf.disable_ipv6)) {
+ IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ return 0;
+ }
+
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb->dst))
return ip6_fragment(skb, ip6_output2);
@@ -409,6 +414,9 @@ int ip6_forward(struct sk_buff *skb)
if (ipv6_devconf.forwarding == 0)
goto error;
+ if (skb_warn_if_lro(skb))
+ goto drop;
+
if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
goto drop;
@@ -497,7 +505,8 @@ int ip6_forward(struct sk_buff *skb)
int addrtype = ipv6_addr_type(&hdr->saddr);
/* This check is security critical. */
- if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
+ if (addrtype == IPV6_ADDR_ANY ||
+ addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
goto error;
if (addrtype & IPV6_ADDR_LINKLOCAL) {
icmpv6_send(skb, ICMPV6_DEST_UNREACH,
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2bda3ba100b1..17c7b098cdb0 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -6,8 +6,6 @@
* Ville Nuorvala <vnuorval@tcs.hut.fi>
* Yasuyuki Kozakai <kozakai@linux-ipv6.org>
*
- * $Id$
- *
* Based on:
* linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
*
@@ -711,7 +709,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
}
if (!ip6_tnl_rcv_ctl(t)) {
- t->stat.rx_dropped++;
+ t->dev->stats.rx_dropped++;
read_unlock(&ip6_tnl_lock);
goto discard;
}
@@ -728,8 +726,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
dscp_ecn_decapsulate(t, ipv6h, skb);
- t->stat.rx_packets++;
- t->stat.rx_bytes += skb->len;
+ t->dev->stats.rx_packets++;
+ t->dev->stats.rx_bytes += skb->len;
netif_rx(skb);
read_unlock(&ip6_tnl_lock);
return 0;
@@ -849,7 +847,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
__u32 *pmtu)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct net_device_stats *stats = &t->stat;
+ struct net_device_stats *stats = &t->dev->stats;
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct ipv6_tel_txoption opt;
struct dst_entry *dst;
@@ -1043,11 +1041,11 @@ static int
ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct net_device_stats *stats = &t->stat;
+ struct net_device_stats *stats = &t->dev->stats;
int ret;
if (t->recursion++) {
- t->stat.collisions++;
+ stats->collisions++;
goto tx_err;
}
@@ -1289,19 +1287,6 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
/**
- * ip6_tnl_get_stats - return the stats for tunnel device
- * @dev: virtual device associated with tunnel
- *
- * Return: stats for device
- **/
-
-static struct net_device_stats *
-ip6_tnl_get_stats(struct net_device *dev)
-{
- return &(((struct ip6_tnl *)netdev_priv(dev))->stat);
-}
-
-/**
* ip6_tnl_change_mtu - change mtu manually for tunnel device
* @dev: virtual device associated with tunnel
* @new_mtu: the new mtu
@@ -1334,7 +1319,6 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
dev->uninit = ip6_tnl_dev_uninit;
dev->destructor = free_netdev;
dev->hard_start_xmit = ip6_tnl_xmit;
- dev->get_stats = ip6_tnl_get_stats;
dev->do_ioctl = ip6_tnl_ioctl;
dev->change_mtu = ip6_tnl_change_mtu;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 14796181e8b5..0b41aa2675f5 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -388,8 +388,8 @@ static int pim6_rcv(struct sk_buff *skb)
skb->ip_summed = 0;
skb->pkt_type = PACKET_HOST;
dst_release(skb->dst);
- ((struct net_device_stats *)netdev_priv(reg_dev))->rx_bytes += skb->len;
- ((struct net_device_stats *)netdev_priv(reg_dev))->rx_packets++;
+ reg_dev->stats.rx_bytes += skb->len;
+ reg_dev->stats.rx_packets++;
skb->dst = NULL;
nf_reset(skb);
netif_rx(skb);
@@ -409,26 +409,20 @@ static struct inet6_protocol pim6_protocol = {
static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
{
read_lock(&mrt_lock);
- ((struct net_device_stats *)netdev_priv(dev))->tx_bytes += skb->len;
- ((struct net_device_stats *)netdev_priv(dev))->tx_packets++;
+ dev->stats.tx_bytes += skb->len;
+ dev->stats.tx_packets++;
ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
read_unlock(&mrt_lock);
kfree_skb(skb);
return 0;
}
-static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
-{
- return (struct net_device_stats *)netdev_priv(dev);
-}
-
static void reg_vif_setup(struct net_device *dev)
{
dev->type = ARPHRD_PIMREG;
dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
dev->flags = IFF_NOARP;
dev->hard_start_xmit = reg_vif_xmit;
- dev->get_stats = reg_vif_get_stats;
dev->destructor = free_netdev;
}
@@ -436,9 +430,7 @@ static struct net_device *ip6mr_reg_vif(void)
{
struct net_device *dev;
- dev = alloc_netdev(sizeof(struct net_device_stats), "pim6reg",
- reg_vif_setup);
-
+ dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
if (dev == NULL)
return NULL;
@@ -451,6 +443,7 @@ static struct net_device *ip6mr_reg_vif(void)
if (dev_open(dev))
goto failure;
+ dev_hold(dev);
return dev;
failure:
@@ -603,6 +596,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
int vifi = vifc->mif6c_mifi;
struct mif_device *v = &vif6_table[vifi];
struct net_device *dev;
+ int err;
/* Is vif busy ? */
if (MIF_EXISTS(vifi))
@@ -620,20 +614,28 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
dev = ip6mr_reg_vif();
if (!dev)
return -ENOBUFS;
+ err = dev_set_allmulti(dev, 1);
+ if (err) {
+ unregister_netdevice(dev);
+ dev_put(dev);
+ return err;
+ }
break;
#endif
case 0:
dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
if (!dev)
return -EADDRNOTAVAIL;
- dev_put(dev);
+ err = dev_set_allmulti(dev, 1);
+ if (err) {
+ dev_put(dev);
+ return err;
+ }
break;
default:
return -EINVAL;
}
- dev_set_allmulti(dev, 1);
-
/*
* Fill in the VIF structures
*/
@@ -652,7 +654,6 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock)
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
- dev_hold(dev);
v->dev = dev;
#ifdef CONFIG_IPV6_PIMSM_V2
if (v->flags & MIFF_REGISTER)
@@ -956,23 +957,51 @@ static struct notifier_block ip6_mr_notifier = {
* Setup for IP multicast routing
*/
-void __init ip6_mr_init(void)
+int __init ip6_mr_init(void)
{
+ int err;
+
mrt_cachep = kmem_cache_create("ip6_mrt_cache",
sizeof(struct mfc6_cache),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (!mrt_cachep)
- panic("cannot allocate ip6_mrt_cache");
+ return -ENOMEM;
setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
- register_netdevice_notifier(&ip6_mr_notifier);
+ err = register_netdevice_notifier(&ip6_mr_notifier);
+ if (err)
+ goto reg_notif_fail;
+#ifdef CONFIG_PROC_FS
+ err = -ENOMEM;
+ if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
+ goto proc_vif_fail;
+ if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
+ 0, &ip6mr_mfc_fops))
+ goto proc_cache_fail;
+#endif
+ return 0;
+reg_notif_fail:
+ kmem_cache_destroy(mrt_cachep);
#ifdef CONFIG_PROC_FS
- proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops);
- proc_net_fops_create(&init_net, "ip6_mr_cache", 0, &ip6mr_mfc_fops);
+proc_vif_fail:
+ unregister_netdevice_notifier(&ip6_mr_notifier);
+proc_cache_fail:
+ proc_net_remove(&init_net, "ip6_mr_vif");
#endif
+ return err;
}
+void ip6_mr_cleanup(void)
+{
+#ifdef CONFIG_PROC_FS
+ proc_net_remove(&init_net, "ip6_mr_cache");
+ proc_net_remove(&init_net, "ip6_mr_vif");
+#endif
+ unregister_netdevice_notifier(&ip6_mr_notifier);
+ del_timer(&ipmr_expire_timer);
+ kmem_cache_destroy(mrt_cachep);
+}
static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
{
@@ -1248,7 +1277,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
#endif
/*
- * Spurious command, or MRT_VERSION which you cannot
+ * Spurious command, or MRT6_VERSION which you cannot
* set.
*/
default:
@@ -1377,8 +1406,8 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
if (vif->flags & MIFF_REGISTER) {
vif->pkt_out++;
vif->bytes_out += skb->len;
- ((struct net_device_stats *)netdev_priv(vif->dev))->tx_bytes += skb->len;
- ((struct net_device_stats *)netdev_priv(vif->dev))->tx_packets++;
+ vif->dev->stats.tx_bytes += skb->len;
+ vif->dev->stats.tx_packets++;
ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
kfree_skb(skb);
return 0;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 86e28a75267f..8c6ea07f4d56 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -7,8 +7,6 @@
*
* Based on linux/net/ipv4/ip_sockglue.c
*
- * $Id: ipv6_sockglue.c,v 1.41 2002/02/01 22:01:04 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -61,7 +59,7 @@ DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
struct ip6_ra_chain *ip6_ra_chain;
DEFINE_RWLOCK(ip6_ra_lock);
-int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
+int ip6_ra_control(struct sock *sk, int sel)
{
struct ip6_ra_chain *ra, *new_ra, **rap;
@@ -83,8 +81,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
*rap = ra->next;
write_unlock_bh(&ip6_ra_lock);
- if (ra->destructor)
- ra->destructor(sk);
sock_put(sk);
kfree(ra);
return 0;
@@ -96,7 +92,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
}
new_ra->sk = sk;
new_ra->sel = sel;
- new_ra->destructor = destructor;
new_ra->next = ra;
*rap = new_ra;
sock_hold(sk);
@@ -634,7 +629,7 @@ done:
case IPV6_ROUTER_ALERT:
if (optlen < sizeof(int))
goto e_inval;
- retv = ip6_ra_control(sk, val, NULL);
+ retv = ip6_ra_control(sk, val);
break;
case IPV6_MTU_DISCOVER:
if (optlen < sizeof(int))
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index fd632dd7f98d..bd2fe4cfafa7 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -5,8 +5,6 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: mcast.c,v 1.40 2002/02/08 03:57:19 davem Exp $
- *
* Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c
*
* This program is free software; you can redistribute it and/or
@@ -164,7 +162,6 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
((MLDV2_MASK(value, nbmant) | (1<<(nbmant))) << \
(MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp))))
-#define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value)
#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
#define IPV6_MLD_MAX_MSF 64
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 6cae5475737e..689dec899c57 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -208,5 +208,17 @@ config IP6_NF_RAW
If you want to compile it as a module, say M here and read
<file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+# security table for MAC policy
+config IP6_NF_SECURITY
+ tristate "Security table"
+ depends on IP6_NF_IPTABLES
+ depends on SECURITY
+ default m if NETFILTER_ADVANCED=n
+ help
+ This option adds a `security' table to iptables, for use
+ with Mandatory Access Control (MAC) policy.
+
+ If unsure, say N.
+
endmenu
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index fbf2c14ed887..3f17c948eefb 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
+obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
# objects for l3 independent conntrack
nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 2eff3ae8977d..1b8815f6153d 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -159,7 +159,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
case IPQ_COPY_META:
case IPQ_COPY_NONE:
size = NLMSG_SPACE(sizeof(*pmsg));
- data_len = 0;
break;
case IPQ_COPY_PACKET:
@@ -226,8 +225,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
return skb;
nlmsg_failure:
- if (skb)
- kfree_skb(skb);
*errp = -EINVAL;
printk(KERN_ERR "ip6_queue: error creating packet message\n");
return NULL;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index f979e48b469b..55a2c290bad4 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -61,13 +61,25 @@ static struct xt_table packet_filter = {
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6t_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ip6t_local_in_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ip6t_do_table(skb, hook, in, out,
+ nf_local_in_net(in, out)->ipv6.ip6table_filter);
+}
+
+static unsigned int
+ip6t_forward_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_filter);
+ return ip6t_do_table(skb, hook, in, out,
+ nf_forward_net(in, out)->ipv6.ip6table_filter);
}
static unsigned int
@@ -87,19 +99,20 @@ ip6t_local_out_hook(unsigned int hook,
}
#endif
- return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_filter);
+ return ip6t_do_table(skb, hook, in, out,
+ nf_local_out_net(in, out)->ipv6.ip6table_filter);
}
static struct nf_hook_ops ip6t_ops[] __read_mostly = {
{
- .hook = ip6t_hook,
+ .hook = ip6t_local_in_hook,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_FILTER,
},
{
- .hook = ip6t_hook,
+ .hook = ip6t_forward_hook,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_FORWARD,
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
new file mode 100644
index 000000000000..a07abee30497
--- /dev/null
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -0,0 +1,172 @@
+/*
+ * "security" table for IPv6
+ *
+ * This is for use by Mandatory Access Control (MAC) security models,
+ * which need to be able to manage security policy in separate context
+ * to DAC.
+ *
+ * Based on iptable_mangle.c
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org>
+ * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>");
+MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
+
+#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT)
+
+static struct
+{
+ struct ip6t_replace repl;
+ struct ip6t_standard entries[3];
+ struct ip6t_error term;
+} initial_table __initdata = {
+ .repl = {
+ .name = "security",
+ .valid_hooks = SECURITY_VALID_HOOKS,
+ .num_entries = 4,
+ .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error),
+ .hook_entry = {
+ [NF_INET_LOCAL_IN] = 0,
+ [NF_INET_FORWARD] = sizeof(struct ip6t_standard),
+ [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2,
+ },
+ .underflow = {
+ [NF_INET_LOCAL_IN] = 0,
+ [NF_INET_FORWARD] = sizeof(struct ip6t_standard),
+ [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2,
+ },
+ },
+ .entries = {
+ IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */
+ IP6T_STANDARD_INIT(NF_ACCEPT), /* FORWARD */
+ IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
+ },
+ .term = IP6T_ERROR_INIT, /* ERROR */
+};
+
+static struct xt_table security_table = {
+ .name = "security",
+ .valid_hooks = SECURITY_VALID_HOOKS,
+ .lock = __RW_LOCK_UNLOCKED(security_table.lock),
+ .me = THIS_MODULE,
+ .af = AF_INET6,
+};
+
+static unsigned int
+ip6t_local_in_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ip6t_do_table(skb, hook, in, out,
+ nf_local_in_net(in, out)->ipv6.ip6table_security);
+}
+
+static unsigned int
+ip6t_forward_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ return ip6t_do_table(skb, hook, in, out,
+ nf_forward_net(in, out)->ipv6.ip6table_security);
+}
+
+static unsigned int
+ip6t_local_out_hook(unsigned int hook,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ /* TBD: handle short packets via raw socket */
+ return ip6t_do_table(skb, hook, in, out,
+ nf_local_out_net(in, out)->ipv6.ip6table_security);
+}
+
+static struct nf_hook_ops ip6t_ops[] __read_mostly = {
+ {
+ .hook = ip6t_local_in_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP6_PRI_SECURITY,
+ },
+ {
+ .hook = ip6t_forward_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_INET_FORWARD,
+ .priority = NF_IP6_PRI_SECURITY,
+ },
+ {
+ .hook = ip6t_local_out_hook,
+ .owner = THIS_MODULE,
+ .pf = PF_INET6,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP6_PRI_SECURITY,
+ },
+};
+
+static int __net_init ip6table_security_net_init(struct net *net)
+{
+ net->ipv6.ip6table_security =
+ ip6t_register_table(net, &security_table, &initial_table.repl);
+
+ if (IS_ERR(net->ipv6.ip6table_security))
+ return PTR_ERR(net->ipv6.ip6table_security);
+
+ return 0;
+}
+
+static void __net_exit ip6table_security_net_exit(struct net *net)
+{
+ ip6t_unregister_table(net->ipv6.ip6table_security);
+}
+
+static struct pernet_operations ip6table_security_net_ops = {
+ .init = ip6table_security_net_init,
+ .exit = ip6table_security_net_exit,
+};
+
+static int __init ip6table_security_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&ip6table_security_net_ops);
+ if (ret < 0)
+ return ret;
+
+ ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
+ if (ret < 0)
+ goto cleanup_table;
+
+ return ret;
+
+cleanup_table:
+ unregister_pernet_subsys(&ip6table_security_net_ops);
+ return ret;
+}
+
+static void __exit ip6table_security_fini(void)
+{
+ nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops));
+ unregister_pernet_subsys(&ip6table_security_net_ops);
+}
+
+module_init(ip6table_security_init);
+module_exit(ip6table_security_fini);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index ee713b03e9ec..14d47d833545 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -89,9 +89,8 @@ static int icmpv6_packet(struct nf_conn *ct,
means this will only run once even if count hits zero twice
(theoretically possible with SMP) */
if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
- if (atomic_dec_and_test(&ct->proto.icmp.count)
- && del_timer(&ct->timeout))
- ct->timeout.function((unsigned long)ct);
+ if (atomic_dec_and_test(&ct->proto.icmp.count))
+ nf_ct_kill_acct(ct, ctinfo, skb);
} else {
atomic_inc(&ct->proto.icmp.count);
nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index df0736a4cafa..70940b3654a1 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -7,8 +7,6 @@
* PROC file system. This is very similar to the IPv4 version,
* except it reports the sockets in the INET6 address family.
*
- * Version: $Id: proc.c,v 1.17 2002/02/01 22:01:04 davem Exp $
- *
* Authors: David S. Miller (davem@caip.rutgers.edu)
* YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
*
@@ -185,32 +183,7 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
static int sockstat6_seq_open(struct inode *inode, struct file *file)
{
- int err;
- struct net *net;
-
- err = -ENXIO;
- net = get_proc_net(inode);
- if (net == NULL)
- goto err_net;
-
- err = single_open(file, sockstat6_seq_show, net);
- if (err < 0)
- goto err_open;
-
- return 0;
-
-err_open:
- put_net(net);
-err_net:
- return err;
-}
-
-static int sockstat6_seq_release(struct inode *inode, struct file *file)
-{
- struct net *net = ((struct seq_file *)file->private_data)->private;
-
- put_net(net);
- return single_release(inode, file);
+ return single_open_net(inode, file, sockstat6_seq_show);
}
static const struct file_operations sockstat6_seq_fops = {
@@ -218,7 +191,7 @@ static const struct file_operations sockstat6_seq_fops = {
.open = sockstat6_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = sockstat6_seq_release,
+ .release = single_release_net,
};
static int snmp6_seq_open(struct inode *inode, struct file *file)
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index f929f47b925e..9ab789159913 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -5,8 +5,6 @@
*
* PF_INET6 protocol dispatch tables.
*
- * Version: $Id: protocol.c,v 1.10 2001/05/18 02:25:49 davem Exp $
- *
* Authors: Pedro Roque <roque@di.fc.ul.pt>
*
* This program is free software; you can redistribute it and/or
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 3aee12310d94..01d47674f7e5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -7,8 +7,6 @@
*
* Adapted from linux/net/ipv4/raw.c
*
- * $Id: raw.c,v 1.51 2002/02/01 22:01:04 davem Exp $
- *
* Fixes:
* Hideaki YOSHIFUJI : sin6_scope_id support
* YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance)
@@ -1159,18 +1157,18 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
static void rawv6_close(struct sock *sk, long timeout)
{
if (inet_sk(sk)->num == IPPROTO_RAW)
- ip6_ra_control(sk, -1, NULL);
+ ip6_ra_control(sk, -1);
ip6mr_sk_done(sk);
sk_common_release(sk);
}
-static int raw6_destroy(struct sock *sk)
+static void raw6_destroy(struct sock *sk)
{
lock_sock(sk);
ip6_flush_pending_frames(sk);
release_sock(sk);
- return inet6_destroy_sock(sk);
+ inet6_destroy_sock(sk);
}
static int rawv6_init_sk(struct sock *sk)
@@ -1253,7 +1251,7 @@ static int raw6_seq_show(struct seq_file *seq, void *v)
"local_address "
"remote_address "
"st tx_queue rx_queue tr tm->when retrnsmt"
- " uid timeout inode drops\n");
+ " uid timeout inode ref pointer drops\n");
else
raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket);
return 0;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index a60d7d129713..6ab957ec2dd6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -5,8 +5,6 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: reassembly.c,v 1.26 2001/03/07 22:00:57 davem Exp $
- *
* Based on: net/ipv4/ip_fragment.c
*
* This program is free software; you can redistribute it and/or
@@ -634,7 +632,7 @@ static struct inet6_protocol frag_protocol =
};
#ifdef CONFIG_SYSCTL
-static struct ctl_table ip6_frags_ctl_table[] = {
+static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
.ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH,
.procname = "ip6frag_high_thresh",
@@ -660,6 +658,10 @@ static struct ctl_table ip6_frags_ctl_table[] = {
.proc_handler = &proc_dointvec_jiffies,
.strategy = &sysctl_jiffies,
},
+ { }
+};
+
+static struct ctl_table ip6_frags_ctl_table[] = {
{
.ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL,
.procname = "ip6frag_secret_interval",
@@ -672,21 +674,20 @@ static struct ctl_table ip6_frags_ctl_table[] = {
{ }
};
-static int ip6_frags_sysctl_register(struct net *net)
+static int ip6_frags_ns_sysctl_register(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
- table = ip6_frags_ctl_table;
+ table = ip6_frags_ns_ctl_table;
if (net != &init_net) {
- table = kmemdup(table, sizeof(ip6_frags_ctl_table), GFP_KERNEL);
+ table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL);
if (table == NULL)
goto err_alloc;
table[0].data = &net->ipv6.frags.high_thresh;
table[1].data = &net->ipv6.frags.low_thresh;
table[2].data = &net->ipv6.frags.timeout;
- table[3].mode &= ~0222;
}
hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table);
@@ -703,7 +704,7 @@ err_alloc:
return -ENOMEM;
}
-static void ip6_frags_sysctl_unregister(struct net *net)
+static void ip6_frags_ns_sysctl_unregister(struct net *net)
{
struct ctl_table *table;
@@ -711,13 +712,36 @@ static void ip6_frags_sysctl_unregister(struct net *net)
unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr);
kfree(table);
}
+
+static struct ctl_table_header *ip6_ctl_header;
+
+static int ip6_frags_sysctl_register(void)
+{
+ ip6_ctl_header = register_net_sysctl_rotable(net_ipv6_ctl_path,
+ ip6_frags_ctl_table);
+ return ip6_ctl_header == NULL ? -ENOMEM : 0;
+}
+
+static void ip6_frags_sysctl_unregister(void)
+{
+ unregister_net_sysctl_table(ip6_ctl_header);
+}
#else
-static inline int ip6_frags_sysctl_register(struct net *net)
+static inline int ip6_frags_ns_sysctl_register(struct net *net)
{
return 0;
}
-static inline void ip6_frags_sysctl_unregister(struct net *net)
+static inline void ip6_frags_ns_sysctl_unregister(struct net *net)
+{
+}
+
+static inline int ip6_frags_sysctl_register(void)
+{
+ return 0;
+}
+
+static inline void ip6_frags_sysctl_unregister(void)
{
}
#endif
@@ -730,12 +754,12 @@ static int ipv6_frags_init_net(struct net *net)
inet_frags_init_net(&net->ipv6.frags);
- return ip6_frags_sysctl_register(net);
+ return ip6_frags_ns_sysctl_register(net);
}
static void ipv6_frags_exit_net(struct net *net)
{
- ip6_frags_sysctl_unregister(net);
+ ip6_frags_ns_sysctl_unregister(net);
inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
}
@@ -752,7 +776,13 @@ int __init ipv6_frag_init(void)
if (ret)
goto out;
- register_pernet_subsys(&ip6_frags_ops);
+ ret = ip6_frags_sysctl_register();
+ if (ret)
+ goto err_sysctl;
+
+ ret = register_pernet_subsys(&ip6_frags_ops);
+ if (ret)
+ goto err_pernet;
ip6_frags.hashfn = ip6_hashfn;
ip6_frags.constructor = ip6_frag_init;
@@ -765,11 +795,18 @@ int __init ipv6_frag_init(void)
inet_frags_init(&ip6_frags);
out:
return ret;
+
+err_pernet:
+ ip6_frags_sysctl_unregister();
+err_sysctl:
+ inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ goto out;
}
void ipv6_frag_exit(void)
{
inet_frags_fini(&ip6_frags);
+ ip6_frags_sysctl_unregister();
unregister_pernet_subsys(&ip6_frags_ops);
inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7ff687020fa9..cb8a51271b67 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5,8 +5,6 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -230,7 +228,7 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt)
static inline int rt6_need_strict(struct in6_addr *daddr)
{
return (ipv6_addr_type(daddr) &
- (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
+ (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
}
/*
@@ -239,15 +237,20 @@ static inline int rt6_need_strict(struct in6_addr *daddr)
static inline struct rt6_info *rt6_device_match(struct net *net,
struct rt6_info *rt,
+ struct in6_addr *saddr,
int oif,
int flags)
{
struct rt6_info *local = NULL;
struct rt6_info *sprt;
- if (oif) {
- for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
- struct net_device *dev = sprt->rt6i_dev;
+ if (!oif && ipv6_addr_any(saddr))
+ goto out;
+
+ for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
+ struct net_device *dev = sprt->rt6i_dev;
+
+ if (oif) {
if (dev->ifindex == oif)
return sprt;
if (dev->flags & IFF_LOOPBACK) {
@@ -261,14 +264,21 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
}
local = sprt;
}
+ } else {
+ if (ipv6_chk_addr(net, saddr, dev,
+ flags & RT6_LOOKUP_F_IFACE))
+ return sprt;
}
+ }
+ if (oif) {
if (local)
return local;
if (flags & RT6_LOOKUP_F_IFACE)
return net->ipv6.ip6_null_entry;
}
+out:
return rt;
}
@@ -541,7 +551,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
rt = fn->leaf;
- rt = rt6_device_match(net, rt, fl->oif, flags);
+ rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
BACKTRACK(net, &fl->fl6_src);
out:
dst_use(&rt->u.dst, jiffies);
@@ -2406,26 +2416,7 @@ static int ipv6_route_show(struct seq_file *m, void *v)
static int ipv6_route_open(struct inode *inode, struct file *file)
{
- int err;
- struct net *net = get_proc_net(inode);
- if (!net)
- return -ENXIO;
-
- err = single_open(file, ipv6_route_show, net);
- if (err < 0) {
- put_net(net);
- return err;
- }
-
- return 0;
-}
-
-static int ipv6_route_release(struct inode *inode, struct file *file)
-{
- struct seq_file *seq = file->private_data;
- struct net *net = seq->private;
- put_net(net);
- return single_release(inode, file);
+ return single_open_net(inode, file, ipv6_route_show);
}
static const struct file_operations ipv6_route_proc_fops = {
@@ -2433,7 +2424,7 @@ static const struct file_operations ipv6_route_proc_fops = {
.open = ipv6_route_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = ipv6_route_release,
+ .release = single_release_net,
};
static int rt6_stats_seq_show(struct seq_file *seq, void *v)
@@ -2453,26 +2444,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
static int rt6_stats_seq_open(struct inode *inode, struct file *file)
{
- int err;
- struct net *net = get_proc_net(inode);
- if (!net)
- return -ENXIO;
-
- err = single_open(file, rt6_stats_seq_show, net);
- if (err < 0) {
- put_net(net);
- return err;
- }
-
- return 0;
-}
-
-static int rt6_stats_seq_release(struct inode *inode, struct file *file)
-{
- struct seq_file *seq = file->private_data;
- struct net *net = (struct net *)seq->private;
- put_net(net);
- return single_release(inode, file);
+ return single_open_net(inode, file, rt6_stats_seq_show);
}
static const struct file_operations rt6_stats_seq_fops = {
@@ -2480,7 +2452,7 @@ static const struct file_operations rt6_stats_seq_fops = {
.open = rt6_stats_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = rt6_stats_seq_release,
+ .release = single_release_net,
};
#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 32e871a6c25a..b7a50e968506 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -6,8 +6,6 @@
* Pedro Roque <roque@di.fc.ul.pt>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
- * $Id: sit.c,v 1.53 2001/09/25 05:09:53 davem Exp $
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
@@ -493,13 +491,13 @@ static int ipip6_rcv(struct sk_buff *skb)
if ((tunnel->dev->priv_flags & IFF_ISATAP) &&
!isatap_chksrc(skb, iph, tunnel)) {
- tunnel->stat.rx_errors++;
+ tunnel->dev->stats.rx_errors++;
read_unlock(&ipip6_lock);
kfree_skb(skb);
return 0;
}
- tunnel->stat.rx_packets++;
- tunnel->stat.rx_bytes += skb->len;
+ tunnel->dev->stats.rx_packets++;
+ tunnel->dev->stats.rx_bytes += skb->len;
skb->dev = tunnel->dev;
dst_release(skb->dst);
skb->dst = NULL;
@@ -539,7 +537,7 @@ static inline __be32 try_6to4(struct in6_addr *v6dst)
static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct net_device_stats *stats = &tunnel->stat;
+ struct net_device_stats *stats = &tunnel->dev->stats;
struct iphdr *tiph = &tunnel->parms.iph;
struct ipv6hdr *iph6 = ipv6_hdr(skb);
u8 tos = tunnel->parms.iph.tos;
@@ -553,7 +551,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
int addr_type;
if (tunnel->recursion++) {
- tunnel->stat.collisions++;
+ stats->collisions++;
goto tx_error;
}
@@ -620,20 +618,20 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
.oif = tunnel->parms.link,
.proto = IPPROTO_IPV6 };
if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
- tunnel->stat.tx_carrier_errors++;
+ stats->tx_carrier_errors++;
goto tx_error_icmp;
}
}
if (rt->rt_type != RTN_UNICAST) {
ip_rt_put(rt);
- tunnel->stat.tx_carrier_errors++;
+ stats->tx_carrier_errors++;
goto tx_error_icmp;
}
tdev = rt->u.dst.dev;
if (tdev == dev) {
ip_rt_put(rt);
- tunnel->stat.collisions++;
+ stats->collisions++;
goto tx_error;
}
@@ -643,7 +641,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
if (mtu < 68) {
- tunnel->stat.collisions++;
+ stats->collisions++;
ip_rt_put(rt);
goto tx_error;
}
@@ -920,11 +918,6 @@ done:
return err;
}
-static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev)
-{
- return &(((struct ip_tunnel*)netdev_priv(dev))->stat);
-}
-
static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
{
if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr))
@@ -938,7 +931,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->uninit = ipip6_tunnel_uninit;
dev->destructor = free_netdev;
dev->hard_start_xmit = ipip6_tunnel_xmit;
- dev->get_stats = ipip6_tunnel_get_stats;
dev->do_ioctl = ipip6_tunnel_ioctl;
dev->change_mtu = ipip6_tunnel_change_mtu;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 3ecc1157994e..6a68eeb7bbf8 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -137,7 +137,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
;
*mssp = msstab[mssind] + 1;
- NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESSENT);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source,
th->dest, ntohl(th->seq),
@@ -177,11 +177,11 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
if (time_after(jiffies, tp->last_synq_overflow + TCP_TIMEOUT_INIT) ||
(mss = cookie_check(skb, cookie)) == 0) {
- NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESFAILED);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
- NET_INC_STATS_BH(LINUX_MIB_SYNCOOKIESRECV);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 3804dcbbfab0..5c99274558bf 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -37,6 +37,10 @@ static ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
+ { .ctl_name = 0 }
+};
+
+static ctl_table ipv6_table[] = {
{
.ctl_name = NET_IPV6_MLD_MAX_MSF,
.procname = "mld_max_msf",
@@ -80,12 +84,6 @@ static int ipv6_sysctl_net_init(struct net *net)
ipv6_table[2].data = &net->ipv6.sysctl.bindv6only;
- /* We don't want this value to be per namespace, it should be global
- to all namespaces, so make it read-only when we are not in the
- init network namespace */
- if (net != &init_net)
- ipv6_table[3].mode = 0444;
-
net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path,
ipv6_table);
if (!net->ipv6.sysctl.table)
@@ -126,12 +124,29 @@ static struct pernet_operations ipv6_sysctl_net_ops = {
.exit = ipv6_sysctl_net_exit,
};
+static struct ctl_table_header *ip6_header;
+
int ipv6_sysctl_register(void)
{
- return register_pernet_subsys(&ipv6_sysctl_net_ops);
+ int err = -ENOMEM;;
+
+ ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_table);
+ if (ip6_header == NULL)
+ goto out;
+
+ err = register_pernet_subsys(&ipv6_sysctl_net_ops);
+ if (err)
+ goto err_pernet;
+out:
+ return err;
+
+err_pernet:
+ unregister_net_sysctl_table(ip6_header);
+ goto out;
}
void ipv6_sysctl_unregister(void)
{
+ unregister_net_sysctl_table(ip6_header);
unregister_pernet_subsys(&ipv6_sysctl_net_ops);
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 40ea9c36d24b..ae45f9835014 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -5,8 +5,6 @@
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
- *
* Based on:
* linux/net/ipv4/tcp.c
* linux/net/ipv4/tcp_input.c
@@ -72,8 +70,6 @@
static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
-static void tcp_v6_send_check(struct sock *sk, int len,
- struct sk_buff *skb);
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
@@ -82,6 +78,12 @@ static struct inet_connection_sock_af_ops ipv6_specific;
#ifdef CONFIG_TCP_MD5SIG
static struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
+#else
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
+ struct in6_addr *addr)
+{
+ return NULL;
+}
#endif
static void tcp_v6_hash(struct sock *sk)
@@ -321,8 +323,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
int err;
struct tcp_sock *tp;
__u32 seq;
+ struct net *net = dev_net(skb->dev);
- sk = inet6_lookup(dev_net(skb->dev), &tcp_hashinfo, &hdr->daddr,
+ sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
if (sk == NULL) {
@@ -337,7 +340,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
- NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
if (sk->sk_state == TCP_CLOSE)
goto out;
@@ -346,7 +349,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
seq = ntohl(th->seq);
if (sk->sk_state != TCP_LISTEN &&
!between(seq, tp->snd_una, tp->snd_nxt)) {
- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -421,7 +424,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
BUG_TRAP(req->sk == NULL);
if (seq != tcp_rsk(req)->snt_isn) {
- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
goto out;
}
@@ -733,109 +736,105 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
}
-static int tcp_v6_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
- struct in6_addr *saddr,
- struct in6_addr *daddr,
- struct tcphdr *th, int protocol,
- unsigned int tcplen)
+static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
+ struct in6_addr *daddr,
+ struct in6_addr *saddr, int nbytes)
{
- struct scatterlist sg[4];
- __u16 data_len;
- int block = 0;
- __sum16 cksum;
- struct tcp_md5sig_pool *hp;
struct tcp6_pseudohdr *bp;
- struct hash_desc *desc;
- int err;
- unsigned int nbytes = 0;
+ struct scatterlist sg;
- hp = tcp_get_md5sig_pool();
- if (!hp) {
- printk(KERN_WARNING "%s(): hash pool not found...\n", __func__);
- goto clear_hash_noput;
- }
bp = &hp->md5_blk.ip6;
- desc = &hp->md5_desc;
-
/* 1. TCP pseudo-header (RFC2460) */
ipv6_addr_copy(&bp->saddr, saddr);
ipv6_addr_copy(&bp->daddr, daddr);
- bp->len = htonl(tcplen);
- bp->protocol = htonl(protocol);
-
- sg_init_table(sg, 4);
-
- sg_set_buf(&sg[block++], bp, sizeof(*bp));
- nbytes += sizeof(*bp);
+ bp->protocol = cpu_to_be32(IPPROTO_TCP);
+ bp->len = cpu_to_be16(nbytes);
- /* 2. TCP header, excluding options */
- cksum = th->check;
- th->check = 0;
- sg_set_buf(&sg[block++], th, sizeof(*th));
- nbytes += sizeof(*th);
-
- /* 3. TCP segment data (if any) */
- data_len = tcplen - (th->doff << 2);
- if (data_len > 0) {
- u8 *data = (u8 *)th + (th->doff << 2);
- sg_set_buf(&sg[block++], data, data_len);
- nbytes += data_len;
- }
+ sg_init_one(&sg, bp, sizeof(*bp));
+ return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
+}
- /* 4. shared key */
- sg_set_buf(&sg[block++], key->key, key->keylen);
- nbytes += key->keylen;
+static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
+ struct in6_addr *daddr, struct in6_addr *saddr,
+ struct tcphdr *th)
+{
+ struct tcp_md5sig_pool *hp;
+ struct hash_desc *desc;
- sg_mark_end(&sg[block - 1]);
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ desc = &hp->md5_desc;
- /* Now store the hash into the packet */
- err = crypto_hash_init(desc);
- if (err) {
- printk(KERN_WARNING "%s(): hash_init failed\n", __func__);
+ if (crypto_hash_init(desc))
goto clear_hash;
- }
- err = crypto_hash_update(desc, sg, nbytes);
- if (err) {
- printk(KERN_WARNING "%s(): hash_update failed\n", __func__);
+ if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
goto clear_hash;
- }
- err = crypto_hash_final(desc, md5_hash);
- if (err) {
- printk(KERN_WARNING "%s(): hash_final failed\n", __func__);
+ if (tcp_md5_hash_header(hp, th))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ if (crypto_hash_final(desc, md5_hash))
goto clear_hash;
- }
- /* Reset header, and free up the crypto */
tcp_put_md5sig_pool();
- th->check = cksum;
-out:
return 0;
+
clear_hash:
tcp_put_md5sig_pool();
clear_hash_noput:
memset(md5_hash, 0, 16);
- goto out;
+ return 1;
}
-static int tcp_v6_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key,
- struct sock *sk,
- struct dst_entry *dst,
- struct request_sock *req,
- struct tcphdr *th, int protocol,
- unsigned int tcplen)
+static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
+ struct sock *sk, struct request_sock *req,
+ struct sk_buff *skb)
{
struct in6_addr *saddr, *daddr;
+ struct tcp_md5sig_pool *hp;
+ struct hash_desc *desc;
+ struct tcphdr *th = tcp_hdr(skb);
if (sk) {
saddr = &inet6_sk(sk)->saddr;
daddr = &inet6_sk(sk)->daddr;
- } else {
+ } else if (req) {
saddr = &inet6_rsk(req)->loc_addr;
daddr = &inet6_rsk(req)->rmt_addr;
+ } else {
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ saddr = &ip6h->saddr;
+ daddr = &ip6h->daddr;
}
- return tcp_v6_do_calc_md5_hash(md5_hash, key,
- saddr, daddr,
- th, protocol, tcplen);
+
+ hp = tcp_get_md5sig_pool();
+ if (!hp)
+ goto clear_hash_noput;
+ desc = &hp->md5_desc;
+
+ if (crypto_hash_init(desc))
+ goto clear_hash;
+
+ if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
+ goto clear_hash;
+ if (tcp_md5_hash_header(hp, th))
+ goto clear_hash;
+ if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
+ goto clear_hash;
+ if (tcp_md5_hash_key(hp, key))
+ goto clear_hash;
+ if (crypto_hash_final(desc, md5_hash))
+ goto clear_hash;
+
+ tcp_put_md5sig_pool();
+ return 0;
+
+clear_hash:
+ tcp_put_md5sig_pool();
+clear_hash_noput:
+ memset(md5_hash, 0, 16);
+ return 1;
}
static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
@@ -844,43 +843,12 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
struct tcp_md5sig_key *hash_expected;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
- int length = (th->doff << 2) - sizeof (*th);
int genhash;
- u8 *ptr;
u8 newhash[16];
hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
+ hash_location = tcp_parse_md5sig_option(th);
- /* If the TCP option is too short, we can short cut */
- if (length < TCPOLEN_MD5SIG)
- return hash_expected ? 1 : 0;
-
- /* parse options */
- ptr = (u8*)(th + 1);
- while (length > 0) {
- int opcode = *ptr++;
- int opsize;
-
- switch(opcode) {
- case TCPOPT_EOL:
- goto done_opts;
- case TCPOPT_NOP:
- length--;
- continue;
- default:
- opsize = *ptr++;
- if (opsize < 2 || opsize > length)
- goto done_opts;
- if (opcode == TCPOPT_MD5SIG) {
- hash_location = ptr;
- goto done_opts;
- }
- }
- ptr += opsize - 2;
- length -= opsize;
- }
-
-done_opts:
/* do we have a hash as expected? */
if (!hash_expected) {
if (!hash_location)
@@ -907,11 +875,10 @@ done_opts:
}
/* check the signature */
- genhash = tcp_v6_do_calc_md5_hash(newhash,
- hash_expected,
- &ip6h->saddr, &ip6h->daddr,
- th, sk->sk_protocol,
- skb->len);
+ genhash = tcp_v6_md5_hash_skb(newhash,
+ hash_expected,
+ NULL, NULL, skb);
+
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
if (net_ratelimit()) {
printk(KERN_INFO "MD5 Hash %s for "
@@ -1048,10 +1015,9 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
(TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) |
TCPOLEN_MD5SIG);
- tcp_v6_do_calc_md5_hash((__u8 *)&opt[1], key,
- &ipv6_hdr(skb)->daddr,
- &ipv6_hdr(skb)->saddr,
- t1, IPPROTO_TCP, tot_len);
+ tcp_v6_md5_hash_hdr((__u8 *)&opt[1], key,
+ &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr, t1);
}
#endif
@@ -1079,8 +1045,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
- TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
+ TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
+ TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
return;
}
}
@@ -1088,8 +1054,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
kfree_skb(buff);
}
-static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
- struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
+static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
+ struct tcp_md5sig_key *key)
{
struct tcphdr *th = tcp_hdr(skb), *t1;
struct sk_buff *buff;
@@ -1098,22 +1064,6 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
struct sock *ctl_sk = net->ipv6.tcp_sk;
unsigned int tot_len = sizeof(struct tcphdr);
__be32 *topt;
-#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key;
- struct tcp_md5sig_key tw_key;
-#endif
-
-#ifdef CONFIG_TCP_MD5SIG
- if (!tw && skb->sk) {
- key = tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr);
- } else if (tw && tw->tw_md5_keylen) {
- tw_key.key = tw->tw_md5_key;
- tw_key.keylen = tw->tw_md5_keylen;
- key = &tw_key;
- } else {
- key = NULL;
- }
-#endif
if (ts)
tot_len += TCPOLEN_TSTAMP_ALIGNED;
@@ -1154,10 +1104,9 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
if (key) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
- tcp_v6_do_calc_md5_hash((__u8 *)topt, key,
- &ipv6_hdr(skb)->daddr,
- &ipv6_hdr(skb)->saddr,
- t1, IPPROTO_TCP, tot_len);
+ tcp_v6_md5_hash_hdr((__u8 *)topt, key,
+ &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr, t1);
}
#endif
@@ -1180,7 +1129,7 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw,
if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) {
if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
+ TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
return;
}
}
@@ -1193,16 +1142,17 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
- tcp_v6_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
- tcptw->tw_ts_recent);
+ tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw));
inet_twsk_put(tw);
}
static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
{
- tcp_v6_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
+ tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
+ tcp_v6_md5_do_lookup(skb->sk, &ipv6_hdr(skb)->daddr));
}
@@ -1538,9 +1488,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
return newsk;
out_overflow:
- NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out:
- NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
if (opt && opt != np->opt)
sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
@@ -1669,7 +1619,7 @@ discard:
kfree_skb(skb);
return 0;
csum_err:
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
@@ -1707,6 +1657,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
struct tcphdr *th;
struct sock *sk;
int ret;
+ struct net *net = dev_net(skb->dev);
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
@@ -1714,7 +1665,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
/*
* Count it even if it's bad.
*/
- TCP_INC_STATS_BH(TCP_MIB_INSEGS);
+ TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
goto discard_it;
@@ -1738,7 +1689,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
TCP_SKB_CB(skb)->sacked = 0;
- sk = __inet6_lookup(dev_net(skb->dev), &tcp_hashinfo,
+ sk = __inet6_lookup(net, &tcp_hashinfo,
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr, ntohs(th->dest),
inet6_iif(skb));
@@ -1786,7 +1737,7 @@ no_tcp_socket:
if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
bad_packet:
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
} else {
tcp_v6_send_reset(NULL, skb);
}
@@ -1811,7 +1762,7 @@ do_time_wait:
}
if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
- TCP_INC_STATS_BH(TCP_MIB_INERRS);
+ TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
@@ -1871,7 +1822,7 @@ static struct inet_connection_sock_af_ops ipv6_specific = {
#ifdef CONFIG_TCP_MD5SIG
static struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
.md5_lookup = tcp_v6_md5_lookup,
- .calc_md5_hash = tcp_v6_calc_md5_hash,
+ .calc_md5_hash = tcp_v6_md5_hash_skb,
.md5_add = tcp_v6_md5_add_func,
.md5_parse = tcp_v6_parse_md5_keys,
};
@@ -1903,7 +1854,7 @@ static struct inet_connection_sock_af_ops ipv6_mapped = {
#ifdef CONFIG_TCP_MD5SIG
static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
.md5_lookup = tcp_v4_md5_lookup,
- .calc_md5_hash = tcp_v4_calc_md5_hash,
+ .calc_md5_hash = tcp_v4_md5_hash_skb,
.md5_add = tcp_v6_md5_add_func,
.md5_parse = tcp_v6_parse_md5_keys,
};
@@ -1960,7 +1911,7 @@ static int tcp_v6_init_sock(struct sock *sk)
return 0;
}
-static int tcp_v6_destroy_sock(struct sock *sk)
+static void tcp_v6_destroy_sock(struct sock *sk)
{
#ifdef CONFIG_TCP_MD5SIG
/* Clean up the MD5 key list */
@@ -1968,7 +1919,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
tcp_v6_clear_md5_list(sk);
#endif
tcp_v4_destroy_sock(sk);
- return inet6_destroy_sock(sk);
+ inet6_destroy_sock(sk);
}
#ifdef CONFIG_PROC_FS
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index dd309626ae9a..d1477b350f76 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -7,8 +7,6 @@
*
* Based on linux/ipv4/udp.c
*
- * $Id: udp.c,v 1.65 2002/02/01 22:01:04 davem Exp $
- *
* Fixes:
* Hideaki YOSHIFUJI : sin6_scope_id support
* YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
@@ -67,7 +65,7 @@ static struct sock *__udp6_lib_lookup(struct net *net,
int badness = -1;
read_lock(&udp_hash_lock);
- sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
+ sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) {
struct inet_sock *inet = inet_sk(sk);
if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
@@ -168,7 +166,8 @@ try_again:
goto out_free;
if (!peeked)
- UDP6_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
+ UDP6_INC_STATS_USER(sock_net(sk),
+ UDP_MIB_INDATAGRAMS, is_udplite);
sock_recv_timestamp(msg, sk, skb);
@@ -215,7 +214,7 @@ out:
csum_copy_err:
lock_sock(sk);
if (!skb_kill_datagram(sk, skb, flags))
- UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
+ UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);