aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/Kconfig3
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/common.c4
-rw-r--r--net/atm/common.h2
-rw-r--r--net/atm/pvc.c2
-rw-r--r--net/atm/svc.c2
-rw-r--r--net/ax25/af_ax25.c4
-rw-r--r--net/bluetooth/bnep/sock.c2
-rw-r--r--net/bluetooth/cmtp/sock.c2
-rw-r--r--net/bluetooth/hci_core.c9
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/hidp/sock.c2
-rw-r--r--net/bluetooth/l2cap_sock.c10
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bluetooth/rfcomm/sock.c8
-rw-r--r--net/bluetooth/sco.c8
-rw-r--r--net/bridge/br.c22
-rw-r--r--net/bridge/br_mdb.c2
-rw-r--r--net/bridge/br_multicast.c240
-rw-r--r--net/bridge/br_netfilter.c67
-rw-r--r--net/bridge/br_netlink.c28
-rw-r--r--net/bridge/br_private.h6
-rw-r--r--net/bridge/br_stp.c6
-rw-r--r--net/bridge/br_stp_timer.c2
-rw-r--r--net/bridge/netfilter/ebt_stp.c6
-rw-r--r--net/bridge/netfilter/ebtables.c6
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/can/af_can.c2
-rw-r--r--net/ceph/messenger.c4
-rw-r--r--net/core/dev.c214
-rw-r--r--net/core/ethtool.c11
-rw-r--r--net/core/filter.c85
-rw-r--r--net/core/flow_dissector.c428
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/net-sysfs.c10
-rw-r--r--net/core/net_namespace.c133
-rw-r--r--net/core/pktgen.c100
-rw-r--r--net/core/rtnetlink.c27
-rw-r--r--net/core/skbuff.c323
-rw-r--r--net/core/sock.c39
-rw-r--r--net/core/stream.c6
-rw-r--r--net/decnet/af_decnet.c8
-rw-r--r--net/dsa/dsa.c2
-rw-r--r--net/dsa/slave.c54
-rw-r--r--net/ethernet/eth.c14
-rw-r--r--net/ieee802154/Makefile4
-rw-r--r--net/ieee802154/nl-phy.c5
-rw-r--r--net/ieee802154/nl802154.c2
-rw-r--r--net/ieee802154/rdev-ops.h85
-rw-r--r--net/ieee802154/socket.c2
-rw-r--r--net/ieee802154/trace.c7
-rw-r--r--net/ieee802154/trace.h247
-rw-r--r--net/ipv4/Kconfig4
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c6
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/fib_trie.c43
-rw-r--r--net/ipv4/geneve_core.c (renamed from net/ipv4/geneve.c)10
-rw-r--r--net/ipv4/igmp.c162
-rw-r--r--net/ipv4/inet_connection_sock.c19
-rw-r--r--net/ipv4/inet_diag.c8
-rw-r--r--net/ipv4/inet_hashtables.c7
-rw-r--r--net/ipv4/inet_timewait_sock.c2
-rw-r--r--net/ipv4/ip_fragment.c15
-rw-r--r--net/ipv4/ip_output.c42
-rw-r--r--net/ipv4/ip_tunnel_core.c20
-rw-r--r--net/ipv4/ipip.c3
-rw-r--r--net/ipv4/netfilter/arp_tables.c6
-rw-r--r--net/ipv4/netfilter/ip_tables.c6
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c5
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c4
-rw-r--r--net/ipv4/ping.c1
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/route.c32
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/ipv4/tcp.c115
-rw-r--r--net/ipv4/tcp_dctcp.c20
-rw-r--r--net/ipv4/tcp_fastopen.c5
-rw-r--r--net/ipv4/tcp_illinois.c21
-rw-r--r--net/ipv4/tcp_input.c179
-rw-r--r--net/ipv4/tcp_ipv4.c7
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv4/tcp_output.c49
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv4/tcp_vegas.c19
-rw-r--r--net/ipv4/tcp_vegas.h3
-rw-r--r--net/ipv4/tcp_westwood.c15
-rw-r--r--net/ipv4/udp_tunnel.c8
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/ip6_fib.c39
-rw-r--r--net/ipv6/ip6_flowlabel.c4
-rw-r--r--net/ipv6/ip6_output.c43
-rw-r--r--net/ipv6/ip6_udp_tunnel.c6
-rw-r--r--net/ipv6/mcast_snoop.c213
-rw-r--r--net/ipv6/netfilter/ip6_tables.c6
-rw-r--r--net/ipv6/route.c206
-rw-r--r--net/ipv6/sysctl_net_ipv6.c8
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/ipv6/xfrm6_policy.c14
-rw-r--r--net/ipx/af_ipx.c2
-rw-r--r--net/irda/af_irda.c2
-rw-r--r--net/iucv/af_iucv.c10
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_core.c15
-rw-r--r--net/l2tp/l2tp_ppp.c4
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/llc/llc_conn.c6
-rw-r--r--net/mac80211/Kconfig16
-rw-r--r--net/mac80211/cfg.c117
-rw-r--r--net/mac80211/chan.c10
-rw-r--r--net/mac80211/debugfs.c74
-rw-r--r--net/mac80211/debugfs_sta.c85
-rw-r--r--net/mac80211/driver-ops.h11
-rw-r--r--net/mac80211/ethtool.c3
-rw-r--r--net/mac80211/ibss.c5
-rw-r--r--net/mac80211/ieee80211_i.h36
-rw-r--r--net/mac80211/iface.c76
-rw-r--r--net/mac80211/key.c12
-rw-r--r--net/mac80211/key.h3
-rw-r--r--net/mac80211/led.c268
-rw-r--r--net/mac80211/led.h44
-rw-r--r--net/mac80211/main.c15
-rw-r--r--net/mac80211/mesh_plink.c37
-rw-r--r--net/mac80211/mlme.c57
-rw-r--r--net/mac80211/rate.c8
-rw-r--r--net/mac80211/rate.h14
-rw-r--r--net/mac80211/rx.c201
-rw-r--r--net/mac80211/sta_info.c29
-rw-r--r--net/mac80211/sta_info.h41
-rw-r--r--net/mac80211/status.c28
-rw-r--r--net/mac80211/tdls.c27
-rw-r--r--net/mac80211/trace.h42
-rw-r--r--net/mac80211/tx.c528
-rw-r--r--net/mac80211/wep.c6
-rw-r--r--net/mac802154/cfg.c9
-rw-r--r--net/mac802154/ieee802154_i.h3
-rw-r--r--net/mac802154/iface.c5
-rw-r--r--net/mac802154/llsec.c4
-rw-r--r--net/mac802154/main.c7
-rw-r--r--net/mpls/af_mpls.c18
-rw-r--r--net/mpls/internal.h10
-rw-r--r--net/netfilter/Kconfig9
-rw-r--r--net/netfilter/core.c37
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c17
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c13
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c3
-rw-r--r--net/netfilter/ipset/ip_set_core.c49
-rw-r--r--net/netfilter/ipset/ip_set_getport.c6
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h22
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c33
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c43
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c49
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c40
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c40
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c11
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c28
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c29
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c30
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c38
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c52
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c30
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c35
-rw-r--r--net/netfilter/nf_tables_api.c7
-rw-r--r--net/netfilter/nfnetlink_log.c19
-rw-r--r--net/netfilter/nfnetlink_queue_core.c20
-rw-r--r--net/netfilter/nft_compat.c2
-rw-r--r--net/netfilter/x_tables.c18
-rw-r--r--net/netfilter/xt_TCPMSS.c6
-rw-r--r--net/netfilter/xt_mark.c1
-rw-r--r--net/netfilter/xt_set.c3
-rw-r--r--net/netlink/af_netlink.c152
-rw-r--r--net/netrom/af_netrom.c4
-rw-r--r--net/nfc/af_nfc.c2
-rw-r--r--net/nfc/llcp.h2
-rw-r--r--net/nfc/llcp_core.c2
-rw-r--r--net/nfc/llcp_sock.c8
-rw-r--r--net/nfc/nfc.h2
-rw-r--r--net/nfc/rawsock.c4
-rw-r--r--net/openvswitch/Kconfig2
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/openvswitch/flow.c4
-rw-r--r--net/openvswitch/flow_netlink.c2
-rw-r--r--net/openvswitch/vport-geneve.c5
-rw-r--r--net/packet/af_packet.c182
-rw-r--r--net/packet/internal.h12
-rw-r--r--net/phonet/af_phonet.c2
-rw-r--r--net/phonet/pep.c2
-rw-r--r--net/rds/af_rds.c2
-rw-r--r--net/rds/connection.c17
-rw-r--r--net/rds/ib_cm.c13
-rw-r--r--net/rds/tcp_connect.c1
-rw-r--r--net/rds/tcp_listen.c46
-rw-r--r--net/rose/af_rose.c4
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-local.c4
-rw-r--r--net/sched/Kconfig11
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c5
-rw-r--r--net/sched/act_connmark.c2
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_pedit.c5
-rw-r--r--net/sched/cls_api.c12
-rw-r--r--net/sched/cls_flow.c22
-rw-r--r--net/sched/cls_flower.c688
-rw-r--r--net/sched/sch_api.c12
-rw-r--r--net/sched/sch_choke.c20
-rw-r--r--net/sched/sch_codel.c17
-rw-r--r--net/sched/sch_fq_codel.c28
-rw-r--r--net/sched/sch_gred.c32
-rw-r--r--net/sched/sch_hhf.c19
-rw-r--r--net/sched/sch_ingress.c59
-rw-r--r--net/sched/sch_netem.c4
-rw-r--r--net/sched/sch_sfb.c24
-rw-r--r--net/sched/sch_sfq.c27
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/socket.c7
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c23
-rw-r--r--net/switchdev/switchdev.c829
-rw-r--r--net/tipc/addr.c7
-rw-r--r--net/tipc/addr.h8
-rw-r--r--net/tipc/bcast.c41
-rw-r--r--net/tipc/bcast.h1
-rw-r--r--net/tipc/bearer.c37
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/core.c4
-rw-r--r--net/tipc/core.h37
-rw-r--r--net/tipc/link.c322
-rw-r--r--net/tipc/link.h59
-rw-r--r--net/tipc/msg.c51
-rw-r--r--net/tipc/msg.h37
-rw-r--r--net/tipc/name_table.c34
-rw-r--r--net/tipc/net.c1
-rw-r--r--net/tipc/netlink_compat.c137
-rw-r--r--net/tipc/node.c3
-rw-r--r--net/tipc/node.h2
-rw-r--r--net/tipc/server.c6
-rw-r--r--net/tipc/socket.c3
-rw-r--r--net/tipc/subscr.c242
-rw-r--r--net/tipc/subscr.h18
-rw-r--r--net/unix/af_unix.c8
-rw-r--r--net/vmw_vsock/af_vsock.c7
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/wireless/chan.c65
-rw-r--r--net/wireless/nl80211.c7
-rw-r--r--net/wireless/reg.c4
-rw-r--r--net/x25/af_x25.c8
-rw-r--r--net/xfrm/xfrm_output.c12
254 files changed, 6668 insertions, 3097 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 98a30a5b8664..59555f0f8fc8 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -443,7 +443,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
case NETDEV_UP:
/* Put all VLANs for this dev in the up state too. */
vlan_group_for_each_dev(grp, i, vlandev) {
- flgs = vlandev->flags;
+ flgs = dev_get_flags(vlandev);
if (flgs & IFF_UP)
continue;
diff --git a/net/Kconfig b/net/Kconfig
index 44dd5786ee91..57a7c5af3175 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -45,6 +45,9 @@ config COMPAT_NETLINK_MESSAGES
Newly written code should NEVER need this option but do
compat-independent messages instead!
+config NET_INGRESS
+ bool
+
menu "Networking options"
source "net/packet/Kconfig"
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 3b7ad43c7dad..d5871ac493eb 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1030,7 +1030,7 @@ static int atalk_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
goto out;
rc = -ENOMEM;
- sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto);
+ sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, kern);
if (!sk)
goto out;
rc = 0;
diff --git a/net/atm/common.c b/net/atm/common.c
index ed0466637e13..49a872db7e42 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -141,7 +141,7 @@ static struct proto vcc_proto = {
.release_cb = vcc_release_cb,
};
-int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
+int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern)
{
struct sock *sk;
struct atm_vcc *vcc;
@@ -149,7 +149,7 @@ int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
sock->sk = NULL;
if (sock->type == SOCK_STREAM)
return -EINVAL;
- sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto);
+ sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, kern);
if (!sk)
return -ENOMEM;
sock_init_data(sock, sk);
diff --git a/net/atm/common.h b/net/atm/common.h
index 4d6f5b2068ac..959436b87182 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -10,7 +10,7 @@
#include <linux/poll.h> /* for poll_table */
-int vcc_create(struct net *net, struct socket *sock, int protocol, int family);
+int vcc_create(struct net *net, struct socket *sock, int protocol, int family, int kern);
int vcc_release(struct socket *sock);
int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index ae0324021407..040207ec399f 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -136,7 +136,7 @@ static int pvc_create(struct net *net, struct socket *sock, int protocol,
return -EAFNOSUPPORT;
sock->ops = &pvc_proto_ops;
- return vcc_create(net, sock, protocol, PF_ATMPVC);
+ return vcc_create(net, sock, protocol, PF_ATMPVC, kern);
}
static const struct net_proto_family pvc_family_ops = {
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 1ba23f5018e7..3fa0a9ee98d1 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -660,7 +660,7 @@ static int svc_create(struct net *net, struct socket *sock, int protocol,
return -EAFNOSUPPORT;
sock->ops = &svc_proto_ops;
- error = vcc_create(net, sock, protocol, AF_ATMSVC);
+ error = vcc_create(net, sock, protocol, AF_ATMSVC, kern);
if (error)
return error;
ATM_SD(sock)->local.sas_family = AF_ATMSVC;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 330c1f4a5a0b..4273533d22b1 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -855,7 +855,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
return -ESOCKTNOSUPPORT;
}
- sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto);
+ sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, kern);
if (sk == NULL)
return -ENOMEM;
@@ -881,7 +881,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
struct sock *sk;
ax25_cb *ax25, *oax25;
- sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot);
+ sk = sk_alloc(sock_net(osk), PF_AX25, GFP_ATOMIC, osk->sk_prot, 0);
if (sk == NULL)
return NULL;
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index bde2bdd9e929..b5116fa9835e 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -202,7 +202,7 @@ static int bnep_sock_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index d82787d417bd..ce86a7bae844 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -205,7 +205,7 @@ static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 476709bd068a..c4802f3bd4c5 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1557,7 +1557,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
{
BT_DBG("%s %p", hdev->name, hdev);
- if (!hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+ if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
+ test_bit(HCI_UP, &hdev->flags)) {
/* Execute vendor specific shutdown routine */
if (hdev->shutdown)
hdev->shutdown(hdev);
@@ -2853,9 +2854,11 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status,
* state. If we were running both LE and BR/EDR inquiry
* simultaneously, and BR/EDR inquiry is already
* finished, stop discovery, otherwise BR/EDR inquiry
- * will stop discovery when finished.
+ * will stop discovery when finished. If we will resolve
+ * remote device name, do not change discovery state.
*/
- if (!test_bit(HCI_INQUIRY, &hdev->flags))
+ if (!test_bit(HCI_INQUIRY, &hdev->flags) &&
+ hdev->discovery.state != DISCOVERY_RESOLVING)
hci_discovery_set_state(hdev,
DISCOVERY_STOPPED);
} else {
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 56f9edbf3d05..5b14dcafcd08 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1377,7 +1377,7 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &hci_sock_ops;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index cb3fdde1968a..008ba439bd62 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -235,7 +235,7 @@ static int hidp_sock_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_RAW)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a7278f05eafb..244287706f91 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -43,7 +43,7 @@ static struct bt_sock_list l2cap_sk_list = {
static const struct proto_ops l2cap_sock_ops;
static void l2cap_sock_init(struct sock *sk, struct sock *parent);
static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
- int proto, gfp_t prio);
+ int proto, gfp_t prio, int kern);
bool l2cap_is_socket(struct socket *sock)
{
@@ -1193,7 +1193,7 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
}
sk = l2cap_sock_alloc(sock_net(parent), NULL, BTPROTO_L2CAP,
- GFP_ATOMIC);
+ GFP_ATOMIC, 0);
if (!sk) {
release_sock(parent);
return NULL;
@@ -1523,12 +1523,12 @@ static struct proto l2cap_proto = {
};
static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
- int proto, gfp_t prio)
+ int proto, gfp_t prio, int kern)
{
struct sock *sk;
struct l2cap_chan *chan;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, kern);
if (!sk)
return NULL;
@@ -1574,7 +1574,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &l2cap_sock_ops;
- sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC);
+ sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 4fea24275b17..29709fbfd1f5 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -200,7 +200,7 @@ static int rfcomm_l2sock_create(struct socket **sock)
BT_DBG("");
- err = sock_create_kern(PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock);
+ err = sock_create_kern(&init_net, PF_BLUETOOTH, SOCK_SEQPACKET, BTPROTO_L2CAP, sock);
if (!err) {
struct sock *sk = (*sock)->sk;
sk->sk_data_ready = rfcomm_l2data_ready;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 825e8fb5114b..b2338e971b33 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -269,12 +269,12 @@ static struct proto rfcomm_proto = {
.obj_size = sizeof(struct rfcomm_pinfo)
};
-static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
+static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern)
{
struct rfcomm_dlc *d;
struct sock *sk;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, kern);
if (!sk)
return NULL;
@@ -324,7 +324,7 @@ static int rfcomm_sock_create(struct net *net, struct socket *sock,
sock->ops = &rfcomm_sock_ops;
- sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC);
+ sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern);
if (!sk)
return -ENOMEM;
@@ -969,7 +969,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
goto done;
}
- sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC);
+ sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC, 0);
if (!sk)
goto done;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 4322c833e748..6b6e59dc54cf 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -460,11 +460,11 @@ static struct proto sco_proto = {
.obj_size = sizeof(struct sco_pinfo)
};
-static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
+static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern)
{
struct sock *sk;
- sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto);
+ sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, kern);
if (!sk)
return NULL;
@@ -501,7 +501,7 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &sco_sock_ops;
- sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC);
+ sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern);
if (!sk)
return -ENOMEM;
@@ -1026,7 +1026,7 @@ static void sco_conn_ready(struct sco_conn *conn)
bh_lock_sock(parent);
sk = sco_sock_alloc(sock_net(parent), NULL,
- BTPROTO_SCO, GFP_ATOMIC);
+ BTPROTO_SCO, GFP_ATOMIC, 0);
if (!sk) {
bh_unlock_sock(parent);
sco_conn_unlock(conn);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 02c24cf63c34..a1abe4936fe1 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -121,13 +121,13 @@ static struct notifier_block br_device_notifier = {
.notifier_call = br_device_event
};
-static int br_netdev_switch_event(struct notifier_block *unused,
- unsigned long event, void *ptr)
+static int br_switchdev_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
{
- struct net_device *dev = netdev_switch_notifier_info_to_dev(ptr);
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
struct net_bridge_port *p;
struct net_bridge *br;
- struct netdev_switch_notifier_fdb_info *fdb_info;
+ struct switchdev_notifier_fdb_info *fdb_info;
int err = NOTIFY_DONE;
rtnl_lock();
@@ -138,14 +138,14 @@ static int br_netdev_switch_event(struct notifier_block *unused,
br = p->br;
switch (event) {
- case NETDEV_SWITCH_FDB_ADD:
+ case SWITCHDEV_FDB_ADD:
fdb_info = ptr;
err = br_fdb_external_learn_add(br, p, fdb_info->addr,
fdb_info->vid);
if (err)
err = notifier_from_errno(err);
break;
- case NETDEV_SWITCH_FDB_DEL:
+ case SWITCHDEV_FDB_DEL:
fdb_info = ptr;
err = br_fdb_external_learn_del(br, p, fdb_info->addr,
fdb_info->vid);
@@ -159,8 +159,8 @@ out:
return err;
}
-static struct notifier_block br_netdev_switch_notifier = {
- .notifier_call = br_netdev_switch_event,
+static struct notifier_block br_switchdev_notifier = {
+ .notifier_call = br_switchdev_event,
};
static void __net_exit br_net_exit(struct net *net)
@@ -214,7 +214,7 @@ static int __init br_init(void)
if (err)
goto err_out3;
- err = register_netdev_switch_notifier(&br_netdev_switch_notifier);
+ err = register_switchdev_notifier(&br_switchdev_notifier);
if (err)
goto err_out4;
@@ -235,7 +235,7 @@ static int __init br_init(void)
return 0;
err_out5:
- unregister_netdev_switch_notifier(&br_netdev_switch_notifier);
+ unregister_switchdev_notifier(&br_switchdev_notifier);
err_out4:
unregister_netdevice_notifier(&br_device_notifier);
err_out3:
@@ -253,7 +253,7 @@ static void __exit br_deinit(void)
{
stp_proto_unregister(&br_stp_proto);
br_netlink_fini();
- unregister_netdev_switch_notifier(&br_netdev_switch_notifier);
+ unregister_switchdev_notifier(&br_switchdev_notifier);
unregister_netdevice_notifier(&br_device_notifier);
brioctl_set(NULL);
unregister_pernet_subsys(&br_net_ops);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 409608960899..e29ad70b3000 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -170,7 +170,7 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
struct br_port_msg *bpm;
struct nlattr *nest, *nest2;
- nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), 0);
if (!nlh)
return -EMSGSIZE;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 4b6722f8f179..d7e103e3538a 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -975,9 +975,6 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
int err = 0;
__be32 group;
- if (!pskb_may_pull(skb, sizeof(*ih)))
- return -EINVAL;
-
ih = igmpv3_report_hdr(skb);
num = ntohs(ih->ngrec);
len = sizeof(*ih);
@@ -1072,7 +1069,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
err = br_ip6_multicast_add_group(br, port, &grec->grec_mca,
vid);
- if (!err)
+ if (err)
break;
}
@@ -1248,25 +1245,14 @@ static int br_ip4_multicast_query(struct net_bridge *br,
max_delay = 10 * HZ;
group = 0;
}
- } else {
- if (!pskb_may_pull(skb, sizeof(struct igmpv3_query))) {
- err = -EINVAL;
- goto out;
- }
-
+ } else if (skb->len >= sizeof(*ih3)) {
ih3 = igmpv3_query_hdr(skb);
if (ih3->nsrcs)
goto out;
max_delay = ih3->code ?
IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1;
- }
-
- /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer
- * all-systems destination addresses (224.0.0.1) for general queries
- */
- if (!group && iph->daddr != htonl(INADDR_ALLHOSTS_GROUP)) {
- err = -EINVAL;
+ } else {
goto out;
}
@@ -1329,12 +1315,6 @@ static int br_ip6_multicast_query(struct net_bridge *br,
(port && port->state == BR_STATE_DISABLED))
goto out;
- /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */
- if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
- err = -EINVAL;
- goto out;
- }
-
if (skb->len == sizeof(*mld)) {
if (!pskb_may_pull(skb, sizeof(*mld))) {
err = -EINVAL;
@@ -1358,14 +1338,6 @@ static int br_ip6_multicast_query(struct net_bridge *br,
is_general_query = group && ipv6_addr_any(group);
- /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer
- * all-nodes destination address (ff02::1) for general queries
- */
- if (is_general_query && !ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) {
- err = -EINVAL;
- goto out;
- }
-
if (is_general_query) {
saddr.proto = htons(ETH_P_IPV6);
saddr.u.ip6 = ip6h->saddr;
@@ -1557,74 +1529,22 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
struct sk_buff *skb,
u16 vid)
{
- struct sk_buff *skb2 = skb;
- const struct iphdr *iph;
+ struct sk_buff *skb_trimmed = NULL;
struct igmphdr *ih;
- unsigned int len;
- unsigned int offset;
int err;
- /* We treat OOM as packet loss for now. */
- if (!pskb_may_pull(skb, sizeof(*iph)))
- return -EINVAL;
-
- iph = ip_hdr(skb);
-
- if (iph->ihl < 5 || iph->version != 4)
- return -EINVAL;
-
- if (!pskb_may_pull(skb, ip_hdrlen(skb)))
- return -EINVAL;
+ err = ip_mc_check_igmp(skb, &skb_trimmed);
- iph = ip_hdr(skb);
-
- if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
- return -EINVAL;
-
- if (iph->protocol != IPPROTO_IGMP) {
- if (!ipv4_is_local_multicast(iph->daddr))
+ if (err == -ENOMSG) {
+ if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr))
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
return 0;
+ } else if (err < 0) {
+ return err;
}
- len = ntohs(iph->tot_len);
- if (skb->len < len || len < ip_hdrlen(skb))
- return -EINVAL;
-
- if (skb->len > len) {
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (!skb2)
- return -ENOMEM;
-
- err = pskb_trim_rcsum(skb2, len);
- if (err)
- goto err_out;
- }
-
- len -= ip_hdrlen(skb2);
- offset = skb_network_offset(skb2) + ip_hdrlen(skb2);
- __skb_pull(skb2, offset);
- skb_reset_transport_header(skb2);
-
- err = -EINVAL;
- if (!pskb_may_pull(skb2, sizeof(*ih)))
- goto out;
-
- switch (skb2->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!csum_fold(skb2->csum))
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb2->csum = 0;
- if (skb_checksum_complete(skb2))
- goto out;
- }
-
- err = 0;
-
BR_INPUT_SKB_CB(skb)->igmp = 1;
- ih = igmp_hdr(skb2);
+ ih = igmp_hdr(skb);
switch (ih->type) {
case IGMP_HOST_MEMBERSHIP_REPORT:
@@ -1633,21 +1553,19 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
err = br_ip4_multicast_add_group(br, port, ih->group, vid);
break;
case IGMPV3_HOST_MEMBERSHIP_REPORT:
- err = br_ip4_multicast_igmp3_report(br, port, skb2, vid);
+ err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid);
break;
case IGMP_HOST_MEMBERSHIP_QUERY:
- err = br_ip4_multicast_query(br, port, skb2, vid);
+ err = br_ip4_multicast_query(br, port, skb_trimmed, vid);
break;
case IGMP_HOST_LEAVE_MESSAGE:
br_ip4_multicast_leave_group(br, port, ih->group, vid);
break;
}
-out:
- __skb_push(skb2, offset);
-err_out:
- if (skb2 != skb)
- kfree_skb(skb2);
+ if (skb_trimmed)
+ kfree_skb(skb_trimmed);
+
return err;
}
@@ -1657,138 +1575,42 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
struct sk_buff *skb,
u16 vid)
{
- struct sk_buff *skb2;
- const struct ipv6hdr *ip6h;
- u8 icmp6_type;
- u8 nexthdr;
- __be16 frag_off;
- unsigned int len;
- int offset;
+ struct sk_buff *skb_trimmed = NULL;
+ struct mld_msg *mld;
int err;
- if (!pskb_may_pull(skb, sizeof(*ip6h)))
- return -EINVAL;
-
- ip6h = ipv6_hdr(skb);
-
- /*
- * We're interested in MLD messages only.
- * - Version is 6
- * - MLD has always Router Alert hop-by-hop option
- * - But we do not support jumbrograms.
- */
- if (ip6h->version != 6)
- return 0;
-
- /* Prevent flooding this packet if there is no listener present */
- if (!ipv6_addr_is_ll_all_nodes(&ip6h->daddr))
- BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
+ err = ipv6_mc_check_mld(skb, &skb_trimmed);
- if (ip6h->nexthdr != IPPROTO_HOPOPTS ||
- ip6h->payload_len == 0)
- return 0;
-
- len = ntohs(ip6h->payload_len) + sizeof(*ip6h);
- if (skb->len < len)
- return -EINVAL;
-
- nexthdr = ip6h->nexthdr;
- offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr, &frag_off);
-
- if (offset < 0 || nexthdr != IPPROTO_ICMPV6)
+ if (err == -ENOMSG) {
+ if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr))
+ BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
return 0;
-
- /* Okay, we found ICMPv6 header */
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (!skb2)
- return -ENOMEM;
-
- err = -EINVAL;
- if (!pskb_may_pull(skb2, offset + sizeof(struct icmp6hdr)))
- goto out;
-
- len -= offset - skb_network_offset(skb2);
-
- __skb_pull(skb2, offset);
- skb_reset_transport_header(skb2);
- skb_postpull_rcsum(skb2, skb_network_header(skb2),
- skb_network_header_len(skb2));
-
- icmp6_type = icmp6_hdr(skb2)->icmp6_type;
-
- switch (icmp6_type) {
- case ICMPV6_MGM_QUERY:
- case ICMPV6_MGM_REPORT:
- case ICMPV6_MGM_REDUCTION:
- case ICMPV6_MLD2_REPORT:
- break;
- default:
- err = 0;
- goto out;
- }
-
- /* Okay, we found MLD message. Check further. */
- if (skb2->len > len) {
- err = pskb_trim_rcsum(skb2, len);
- if (err)
- goto out;
- err = -EINVAL;
- }
-
- ip6h = ipv6_hdr(skb2);
-
- switch (skb2->ip_summed) {
- case CHECKSUM_COMPLETE:
- if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len,
- IPPROTO_ICMPV6, skb2->csum))
- break;
- /*FALLTHROUGH*/
- case CHECKSUM_NONE:
- skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
- &ip6h->daddr,
- skb2->len,
- IPPROTO_ICMPV6, 0));
- if (__skb_checksum_complete(skb2))
- goto out;
+ } else if (err < 0) {
+ return err;
}
- err = 0;
-
BR_INPUT_SKB_CB(skb)->igmp = 1;
+ mld = (struct mld_msg *)skb_transport_header(skb);
- switch (icmp6_type) {
+ switch (mld->mld_type) {
case ICMPV6_MGM_REPORT:
- {
- struct mld_msg *mld;
- if (!pskb_may_pull(skb2, sizeof(*mld))) {
- err = -EINVAL;
- goto out;
- }
- mld = (struct mld_msg *)skb_transport_header(skb2);
BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
err = br_ip6_multicast_add_group(br, port, &mld->mld_mca, vid);
break;
- }
case ICMPV6_MLD2_REPORT:
- err = br_ip6_multicast_mld2_report(br, port, skb2, vid);
+ err = br_ip6_multicast_mld2_report(br, port, skb_trimmed, vid);
break;
case ICMPV6_MGM_QUERY:
- err = br_ip6_multicast_query(br, port, skb2, vid);
+ err = br_ip6_multicast_query(br, port, skb_trimmed, vid);
break;
case ICMPV6_MGM_REDUCTION:
- {
- struct mld_msg *mld;
- if (!pskb_may_pull(skb2, sizeof(*mld))) {
- err = -EINVAL;
- goto out;
- }
- mld = (struct mld_msg *)skb_transport_header(skb2);
br_ip6_multicast_leave_group(br, port, &mld->mld_mca, vid);
- }
+ break;
}
-out:
- kfree_skb(skb2);
+ if (skb_trimmed)
+ kfree_skb(skb_trimmed);
+
return err;
}
#endif
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index ab55e2472beb..46660a28feef 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -37,10 +37,6 @@
#include <net/route.h>
#include <net/netfilter/br_netfilter.h>
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-#include <net/netfilter/nf_conntrack.h>
-#endif
-
#include <asm/uaccess.h>
#include "br_private.h"
#ifdef CONFIG_SYSCTL
@@ -129,6 +125,14 @@ static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb)
return skb->nf_bridge;
}
+static void nf_bridge_info_free(struct sk_buff *skb)
+{
+ if (skb->nf_bridge) {
+ nf_bridge_put(skb->nf_bridge);
+ skb->nf_bridge = NULL;
+ }
+}
+
static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
{
struct net_bridge_port *port;
@@ -350,24 +354,15 @@ free_skb:
return 0;
}
-static bool dnat_took_place(const struct sk_buff *skb)
+static bool daddr_was_changed(const struct sk_buff *skb,
+ const struct nf_bridge_info *nf_bridge)
{
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct;
-
- ct = nf_ct_get(skb, &ctinfo);
- if (!ct || nf_ct_is_untracked(ct))
- return false;
-
- return test_bit(IPS_DST_NAT_BIT, &ct->status);
-#else
- return false;
-#endif
+ return ip_hdr(skb)->daddr != nf_bridge->ipv4_daddr;
}
/* This requires some explaining. If DNAT has taken place,
* we will need to fix up the destination Ethernet address.
+ * This is also true when SNAT takes place (for the reply direction).
*
* There are two cases to consider:
* 1. The packet was DNAT'ed to a device in the same bridge
@@ -421,7 +416,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
nf_bridge->pkt_otherhost = false;
}
nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
- if (dnat_took_place(skb)) {
+ if (daddr_was_changed(skb, nf_bridge)) {
if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -632,6 +627,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
+ struct nf_bridge_info *nf_bridge;
struct net_bridge_port *p;
struct net_bridge *br;
__u32 len = nf_bridge_encap_header_len(skb);
@@ -669,6 +665,9 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
if (!setup_pre_routing(skb))
return NF_DROP;
+ nf_bridge = nf_bridge_info_get(skb);
+ nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr;
+
skb->protocol = htons(ETH_P_IP);
NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb,
@@ -841,17 +840,39 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
__skb_push(skb, data->encap_size);
+ nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(sk, skb);
}
+static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct sock *, struct sk_buff *))
+{
+ unsigned int mtu = ip_skb_dst_mtu(skb);
+ struct iphdr *iph = ip_hdr(skb);
+ struct rtable *rt = skb_rtable(skb);
+ struct net_device *dev = rt->dst.dev;
+
+ if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
+ (IPCB(skb)->frag_max_size &&
+ IPCB(skb)->frag_max_size > mtu))) {
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ return ip_do_fragment(sk, skb, output);
+}
+
static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
{
int ret;
int frag_max_size;
unsigned int mtu_reserved;
- if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP))
+ if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) {
+ nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(sk, skb);
+ }
mtu_reserved = nf_bridge_mtu_reduction(skb);
/* This is wrong! We should preserve the original fragment
@@ -875,8 +896,9 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size);
- ret = ip_fragment(sk, skb, br_nf_push_frag_xmit);
+ ret = br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit);
} else {
+ nf_bridge_info_free(skb);
ret = br_dev_queue_push_xmit(sk, skb);
}
@@ -885,7 +907,8 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
#else
static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
{
- return br_dev_queue_push_xmit(sk, skb);
+ nf_bridge_info_free(skb);
+ return br_dev_queue_push_xmit(sk, skb);
}
#endif
@@ -973,6 +996,8 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
nf_bridge->neigh_header,
ETH_HLEN - ETH_ALEN);
skb->dev = nf_bridge->physindev;
+
+ nf_bridge->physoutdev = NULL;
br_handle_frame_finish(NULL, skb);
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 0e4ddb81610d..6b67ed3831de 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -394,7 +394,7 @@ errout:
* Dump information about all ports, in response to GETLINK
*/
int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
- struct net_device *dev, u32 filter_mask)
+ struct net_device *dev, u32 filter_mask, int nlflags)
{
struct net_bridge_port *port = br_port_get_rtnl(dev);
@@ -402,7 +402,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
!(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED))
return 0;
- return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, NLM_F_MULTI,
+ return br_fill_ifinfo(skb, port, pid, seq, RTM_NEWLINK, nlflags,
filter_mask, dev);
}
@@ -586,7 +586,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
struct nlattr *afspec;
struct net_bridge_port *p;
struct nlattr *tb[IFLA_BRPORT_MAX + 1];
- int err = 0, ret_offload = 0;
+ int err = 0;
protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
@@ -628,16 +628,6 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
afspec, RTM_SETLINK);
}
- if (p && !(flags & BRIDGE_FLAGS_SELF)) {
- /* set bridge attributes in hardware if supported
- */
- ret_offload = netdev_switch_port_bridge_setlink(dev, nlh,
- flags);
- if (ret_offload && ret_offload != -EOPNOTSUPP)
- br_warn(p->br, "error setting attrs on port %u(%s)\n",
- (unsigned int)p->port_no, p->dev->name);
- }
-
if (err == 0)
br_ifinfo_notify(RTM_NEWLINK, p);
out:
@@ -649,7 +639,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
{
struct nlattr *afspec;
struct net_bridge_port *p;
- int err = 0, ret_offload = 0;
+ int err = 0;
afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (!afspec)
@@ -668,16 +658,6 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
*/
br_ifinfo_notify(RTM_NEWLINK, p);
- if (p && !(flags & BRIDGE_FLAGS_SELF)) {
- /* del bridge attributes in hardware
- */
- ret_offload = netdev_switch_port_bridge_dellink(dev, nlh,
- flags);
- if (ret_offload && ret_offload != -EOPNOTSUPP)
- br_warn(p->br, "error deleting attrs on port %u (%s)\n",
- (unsigned int)p->port_no, p->dev->name);
- }
-
return err;
}
static int br_validate(struct nlattr *tb[], struct nlattr *data[])
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 6ca0251cb478..1f36fa70639b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -33,8 +33,8 @@
/* Control of forwarding link local multicast */
#define BR_GROUPFWD_DEFAULT 0
-/* Don't allow forwarding control protocols like STP and LLDP */
-#define BR_GROUPFWD_RESTRICTED 0x4007u
+/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */
+#define BR_GROUPFWD_RESTRICTED 0x0007u
/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */
#define BR_GROUPFWD_8021AD 0xB801u
@@ -828,7 +828,7 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port);
int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev,
- u32 filter_mask);
+ u32 filter_mask, int nlflags);
#ifdef CONFIG_SYSFS
/* br_sysfs_if.c */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index fb3ebe615513..45f1ff113af9 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -39,10 +39,14 @@ void br_log_state(const struct net_bridge_port *p)
void br_set_state(struct net_bridge_port *p, unsigned int state)
{
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_PORT_STP_STATE,
+ .u.stp_state = state,
+ };
int err;
p->state = state;
- err = netdev_switch_port_stp_update(p->dev, state);
+ err = switchdev_port_attr_set(p->dev, &attr);
if (err && err != -EOPNOTSUPP)
br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
(unsigned int) p->port_no, p->dev->name);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 4fcaa67750fd..7caf7fae2d5b 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -97,7 +97,9 @@ static void br_forward_delay_timer_expired(unsigned long arg)
netif_carrier_on(br->dev);
}
br_log_state(p);
+ rcu_read_lock();
br_ifinfo_notify(RTM_NEWLINK, p);
+ rcu_read_unlock();
spin_unlock(&br->lock);
}
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 071d87214dde..0c40570069ba 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -164,8 +164,10 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
!(info->bitmask & EBT_STP_MASK))
return -EINVAL;
/* Make sure the match only receives stp frames */
- if (!ether_addr_equal(e->destmac, bridge_ula) ||
- !ether_addr_equal(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC))
+ if (!par->nft_compat &&
+ (!ether_addr_equal(e->destmac, bridge_ula) ||
+ !ether_addr_equal(e->destmsk, msk) ||
+ !(e->bitmask & EBT_DESTMAC)))
return -EINVAL;
return 0;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 91180a7fc943..d5aba394ff6f 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -139,7 +139,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
ethproto = h->h_proto;
if (e->bitmask & EBT_802_3) {
- if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO))
+ if (FWINV2(eth_proto_is_802_3(ethproto), EBT_IPROTO))
return 1;
} else if (!(e->bitmask & EBT_NOPROTO) &&
FWINV2(e->ethproto != ethproto, EBT_IPROTO))
@@ -1117,6 +1117,8 @@ static int do_replace(struct net *net, const void __user *user,
return -ENOMEM;
if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter))
return -ENOMEM;
+ if (tmp.num_counters == 0)
+ return -EINVAL;
tmp.name[sizeof(tmp.name) - 1] = 0;
@@ -2159,6 +2161,8 @@ static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl,
return -ENOMEM;
if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter))
return -ENOMEM;
+ if (tmp.num_counters == 0)
+ return -EINVAL;
memcpy(repl, &tmp, offsetof(struct ebt_replace, hook_entry));
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 4ec0c803aef1..78a04ebb113c 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1047,7 +1047,7 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
* is really not used at all in the net/core or socket.c but the
* initialization makes sure that sock->state is not uninitialized.
*/
- sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot);
+ sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 32d710eaf1fc..d4d404bdfc9a 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -179,7 +179,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
sock->ops = cp->ops;
- sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot);
+ sk = sk_alloc(net, PF_CAN, GFP_KERNEL, cp->prot, kern);
if (!sk) {
err = -ENOMEM;
goto errout;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 967080a9f043..073262fea6dd 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -480,8 +480,8 @@ static int ceph_tcp_connect(struct ceph_connection *con)
int ret;
BUG_ON(con->sock);
- ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM,
- IPPROTO_TCP, &sock);
+ ret = sock_create_kern(&init_net, con->peer_addr.in_addr.ss_family,
+ SOCK_STREAM, IPPROTO_TCP, &sock);
if (ret)
return ret;
sock->sk->sk_allocation = GFP_NOFS;
diff --git a/net/core/dev.c b/net/core/dev.c
index c7ba0388f1be..594163d0c6eb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,6 +135,7 @@
#include <linux/if_macvlan.h>
#include <linux/errqueue.h>
#include <linux/hrtimer.h>
+#include <linux/netfilter_ingress.h>
#include "net-sysfs.h"
@@ -1630,7 +1631,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_INGRESS
static struct static_key ingress_needed __read_mostly;
void net_inc_ingress_queue(void)
@@ -2350,6 +2351,34 @@ void netif_device_attach(struct net_device *dev)
}
EXPORT_SYMBOL(netif_device_attach);
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
+ unsigned int num_tx_queues)
+{
+ u32 hash;
+ u16 qoffset = 0;
+ u16 qcount = num_tx_queues;
+
+ if (skb_rx_queue_recorded(skb)) {
+ hash = skb_get_rx_queue(skb);
+ while (unlikely(hash >= num_tx_queues))
+ hash -= num_tx_queues;
+ return hash;
+ }
+
+ if (dev->num_tc) {
+ u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+ qoffset = dev->tc_to_txq[tc].offset;
+ qcount = dev->tc_to_txq[tc].count;
+ }
+
+ return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
+}
+EXPORT_SYMBOL(__skb_tx_hash);
+
static void skb_warn_bad_offload(const struct sk_buff *skb)
{
static const netdev_features_t null_features = 0;
@@ -2908,6 +2937,84 @@ int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(dev_loopback_xmit);
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+ struct xps_dev_maps *dev_maps;
+ struct xps_map *map;
+ int queue_index = -1;
+
+ rcu_read_lock();
+ dev_maps = rcu_dereference(dev->xps_maps);
+ if (dev_maps) {
+ map = rcu_dereference(
+ dev_maps->cpu_map[skb->sender_cpu - 1]);
+ if (map) {
+ if (map->len == 1)
+ queue_index = map->queues[0];
+ else
+ queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
+ map->len)];
+ if (unlikely(queue_index >= dev->real_num_tx_queues))
+ queue_index = -1;
+ }
+ }
+ rcu_read_unlock();
+
+ return queue_index;
+#else
+ return -1;
+#endif
+}
+
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ int queue_index = sk_tx_queue_get(sk);
+
+ if (queue_index < 0 || skb->ooo_okay ||
+ queue_index >= dev->real_num_tx_queues) {
+ int new_index = get_xps_queue(dev, skb);
+ if (new_index < 0)
+ new_index = skb_tx_hash(dev, skb);
+
+ if (queue_index != new_index && sk &&
+ rcu_access_pointer(sk->sk_dst_cache))
+ sk_tx_queue_set(sk, new_index);
+
+ queue_index = new_index;
+ }
+
+ return queue_index;
+}
+
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+ struct sk_buff *skb,
+ void *accel_priv)
+{
+ int queue_index = 0;
+
+#ifdef CONFIG_XPS
+ if (skb->sender_cpu == 0)
+ skb->sender_cpu = raw_smp_processor_id() + 1;
+#endif
+
+ if (dev->real_num_tx_queues != 1) {
+ const struct net_device_ops *ops = dev->netdev_ops;
+ if (ops->ndo_select_queue)
+ queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+ __netdev_pick_tx);
+ else
+ queue_index = __netdev_pick_tx(dev, skb);
+
+ if (!accel_priv)
+ queue_index = netdev_cap_txqueue(dev, queue_index);
+ }
+
+ skb_set_queue_mapping(skb, queue_index);
+ return netdev_get_tx_queue(dev, queue_index);
+}
+
/**
* __dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
@@ -3520,66 +3627,47 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
#endif
-#ifdef CONFIG_NET_CLS_ACT
-/* TODO: Maybe we should just force sch_ingress to be compiled in
- * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
- * a compare and 2 stores extra right now if we dont have it on
- * but have CONFIG_NET_CLS_ACT
- * NOTE: This doesn't stop any functionality; if you dont have
- * the ingress scheduler, you just can't add policies on ingress.
- *
- */
-static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
-{
- struct net_device *dev = skb->dev;
- u32 ttl = G_TC_RTTL(skb->tc_verd);
- int result = TC_ACT_OK;
- struct Qdisc *q;
-
- if (unlikely(MAX_RED_LOOP < ttl++)) {
- net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
- skb->skb_iif, dev->ifindex);
- return TC_ACT_SHOT;
- }
-
- skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
- skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
-
- q = rcu_dereference(rxq->qdisc);
- if (q != &noop_qdisc) {
- spin_lock(qdisc_lock(q));
- if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
- result = qdisc_enqueue_root(skb, q);
- spin_unlock(qdisc_lock(q));
- }
-
- return result;
-}
-
static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
- struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
+#ifdef CONFIG_NET_CLS_ACT
+ struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+ struct tcf_result cl_res;
- if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
+ /* If there's at least one ingress present somewhere (so
+ * we get here via enabled static key), remaining devices
+ * that are not configured with an ingress qdisc will bail
+ * out here.
+ */
+ if (!cl)
return skb;
-
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
- switch (ing_filter(skb, rxq)) {
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+ skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
+ qdisc_bstats_update_cpu(cl->q, skb);
+
+ switch (tc_classify(skb, cl, &cl_res)) {
+ case TC_ACT_OK:
+ case TC_ACT_RECLASSIFY:
+ skb->tc_index = TC_H_MIN(cl_res.classid);
+ break;
case TC_ACT_SHOT:
+ qdisc_qstats_drop_cpu(cl->q);
case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
kfree_skb(skb);
return NULL;
+ default:
+ break;
}
-
+#endif /* CONFIG_NET_CLS_ACT */
return skb;
}
-#endif
/**
* netdev_rx_handler_register - register receive handler
@@ -3652,6 +3740,22 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
}
}
+static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
+ int *ret, struct net_device *orig_dev)
+{
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (nf_hook_ingress_active(skb)) {
+ if (*pt_prev) {
+ *ret = deliver_skb(skb, *pt_prev, orig_dev);
+ *pt_prev = NULL;
+ }
+
+ return nf_hook_ingress(skb);
+ }
+#endif /* CONFIG_NETFILTER_INGRESS */
+ return 0;
+}
+
static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
{
struct packet_type *ptype, *pt_prev;
@@ -3711,13 +3815,17 @@ another_round:
}
skip_taps:
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_INGRESS
if (static_key_false(&ingress_needed)) {
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb)
goto unlock;
- }
+ if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
+ goto unlock;
+ }
+#endif
+#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = 0;
ncls:
#endif
@@ -5209,7 +5317,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
return -EBUSY;
- if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))
+ if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper))
return -EEXIST;
if (master && netdev_master_upper_dev_get(dev))
@@ -6320,6 +6428,17 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
return 0;
}
+void netif_tx_stop_all_queues(struct net_device *dev)
+{
+ unsigned int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+ netif_tx_stop_queue(txq);
+ }
+}
+EXPORT_SYMBOL(netif_tx_stop_all_queues);
+
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -6869,6 +6988,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->group = INIT_NETDEV_GROUP;
if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops;
+
+ nf_hook_ingress_init(dev);
+
return dev;
free_all:
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 1d00b8922902..4f6a17ef0710 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -98,7 +98,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_RXALL_BIT] = "rx-all",
[NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
[NETIF_F_BUSY_POLL_BIT] = "busy-poll",
- [NETIF_F_HW_SWITCH_OFFLOAD_BIT] = "hw-switch-offload",
};
static const char
@@ -359,7 +358,15 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
int err;
struct ethtool_cmd cmd;
- err = __ethtool_get_settings(dev, &cmd);
+ if (!dev->ethtool_ops->get_settings)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+ return -EFAULT;
+
+ cmd.cmd = ETHTOOL_GSET;
+
+ err = dev->ethtool_ops->get_settings(dev, &cmd);
if (err < 0)
return err;
diff --git a/net/core/filter.c b/net/core/filter.c
index bf831a85c315..3adcca6f17a4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -36,6 +36,7 @@
#include <net/netlink.h>
#include <linux/skbuff.h>
#include <net/sock.h>
+#include <net/flow_dissector.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <asm/uaccess.h>
@@ -355,8 +356,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
* for socket filters: ctx == 'struct sk_buff *', for seccomp:
* ctx == 'struct seccomp_data *'.
*/
-int bpf_convert_filter(struct sock_filter *prog, int len,
- struct bpf_insn *new_prog, int *new_len)
+static int bpf_convert_filter(struct sock_filter *prog, int len,
+ struct bpf_insn *new_prog, int *new_len)
{
int new_flen = 0, pass = 0, target, i;
struct bpf_insn *new_insn;
@@ -371,7 +372,8 @@ int bpf_convert_filter(struct sock_filter *prog, int len,
return -EINVAL;
if (new_prog) {
- addrs = kcalloc(len, sizeof(*addrs), GFP_KERNEL);
+ addrs = kcalloc(len, sizeof(*addrs),
+ GFP_KERNEL | __GFP_NOWARN);
if (!addrs)
return -ENOMEM;
}
@@ -751,7 +753,8 @@ static bool chk_code_allowed(u16 code_to_probe)
*
* Returns 0 if the rule set is legal or -EINVAL if not.
*/
-int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
+static int bpf_check_classic(const struct sock_filter *filter,
+ unsigned int flen)
{
bool anc_found;
int pc;
@@ -825,7 +828,6 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
return -EINVAL;
}
-EXPORT_SYMBOL(bpf_check_classic);
static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
const struct sock_fprog *fprog)
@@ -839,7 +841,9 @@ static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
fkprog = fp->orig_prog;
fkprog->len = fprog->len;
- fkprog->filter = kmemdup(fp->insns, fsize, GFP_KERNEL);
+
+ fkprog->filter = kmemdup(fp->insns, fsize,
+ GFP_KERNEL | __GFP_NOWARN);
if (!fkprog->filter) {
kfree(fp->orig_prog);
return -ENOMEM;
@@ -941,7 +945,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
* pass. At this time, the user BPF is stored in fp->insns.
*/
old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (!old_prog) {
err = -ENOMEM;
goto out_err;
@@ -988,7 +992,8 @@ out_err:
return ERR_PTR(err);
}
-static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
+static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
+ bpf_aux_classic_check_t trans)
{
int err;
@@ -1001,6 +1006,17 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
return ERR_PTR(err);
}
+ /* There might be additional checks and transformations
+ * needed on classic filters, f.e. in case of seccomp.
+ */
+ if (trans) {
+ err = trans(fp->insns, fp->len);
+ if (err) {
+ __bpf_prog_release(fp);
+ return ERR_PTR(err);
+ }
+ }
+
/* Probe if we can JIT compile the filter and if so, do
* the compilation of the filter.
*/
@@ -1050,7 +1066,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
/* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
- fp = bpf_prepare_filter(fp);
+ fp = bpf_prepare_filter(fp, NULL);
if (IS_ERR(fp))
return PTR_ERR(fp);
@@ -1059,6 +1075,53 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
}
EXPORT_SYMBOL_GPL(bpf_prog_create);
+/**
+ * bpf_prog_create_from_user - create an unattached filter from user buffer
+ * @pfp: the unattached filter that is created
+ * @fprog: the filter program
+ * @trans: post-classic verifier transformation handler
+ *
+ * This function effectively does the same as bpf_prog_create(), only
+ * that it builds up its insns buffer from user space provided buffer.
+ * It also allows for passing a bpf_aux_classic_check_t handler.
+ */
+int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
+ bpf_aux_classic_check_t trans)
+{
+ unsigned int fsize = bpf_classic_proglen(fprog);
+ struct bpf_prog *fp;
+
+ /* Make sure new filter is there and in the right amounts. */
+ if (fprog->filter == NULL)
+ return -EINVAL;
+
+ fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
+ if (!fp)
+ return -ENOMEM;
+
+ if (copy_from_user(fp->insns, fprog->filter, fsize)) {
+ __bpf_prog_free(fp);
+ return -EFAULT;
+ }
+
+ fp->len = fprog->len;
+ /* Since unattached filters are not copied back to user
+ * space through sk_get_filter(), we do not need to hold
+ * a copy here, and can spare us the work.
+ */
+ fp->orig_prog = NULL;
+
+ /* bpf_prepare_filter() already takes care of freeing
+ * memory in case something goes wrong.
+ */
+ fp = bpf_prepare_filter(fp, trans);
+ if (IS_ERR(fp))
+ return PTR_ERR(fp);
+
+ *pfp = fp;
+ return 0;
+}
+
void bpf_prog_destroy(struct bpf_prog *fp)
{
__bpf_prog_release(fp);
@@ -1135,7 +1198,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
/* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
- prog = bpf_prepare_filter(prog);
+ prog = bpf_prepare_filter(prog, NULL);
if (IS_ERR(prog))
return PTR_ERR(prog);
@@ -1358,6 +1421,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
return &bpf_get_prandom_u32_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
+ case BPF_FUNC_tail_call:
+ return &bpf_tail_call_proto;
default:
return NULL;
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2c35c02a931e..1f2d89300b1a 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1,3 +1,4 @@
+#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/export.h>
#include <linux/ip.h>
@@ -12,19 +13,57 @@
#include <linux/if_tunnel.h>
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
-#include <net/flow_keys.h>
+#include <linux/stddef.h>
+#include <linux/if_ether.h>
+#include <net/flow_dissector.h>
#include <scsi/fc/fc_fcoe.h>
-/* copy saddr & daddr, possibly using 64bit load/store
- * Equivalent to : flow->src = iph->saddr;
- * flow->dst = iph->daddr;
- */
-static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph)
+static bool skb_flow_dissector_uses_key(struct flow_dissector *flow_dissector,
+ enum flow_dissector_key_id key_id)
+{
+ return flow_dissector->used_keys & (1 << key_id);
+}
+
+static void skb_flow_dissector_set_key(struct flow_dissector *flow_dissector,
+ enum flow_dissector_key_id key_id)
+{
+ flow_dissector->used_keys |= (1 << key_id);
+}
+
+static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
+ enum flow_dissector_key_id key_id,
+ void *target_container)
+{
+ return ((char *) target_container) + flow_dissector->offset[key_id];
+}
+
+void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
+ const struct flow_dissector_key *key,
+ unsigned int key_count)
{
- BUILD_BUG_ON(offsetof(typeof(*flow), dst) !=
- offsetof(typeof(*flow), src) + sizeof(flow->src));
- memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst));
+ unsigned int i;
+
+ memset(flow_dissector, 0, sizeof(*flow_dissector));
+
+ for (i = 0; i < key_count; i++, key++) {
+ /* User should make sure that every key target offset is withing
+ * boundaries of unsigned short.
+ */
+ BUG_ON(key->offset > USHRT_MAX);
+ BUG_ON(skb_flow_dissector_uses_key(flow_dissector,
+ key->key_id));
+
+ skb_flow_dissector_set_key(flow_dissector, key->key_id);
+ flow_dissector->offset[key->key_id] = key->offset;
+ }
+
+ /* Ensure that the dissector always includes basic key. That way
+ * we are able to avoid handling lack of it in fast path.
+ */
+ BUG_ON(!skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_BASIC));
}
+EXPORT_SYMBOL(skb_flow_dissector_init);
/**
* __skb_flow_get_ports - extract the upper layer ports and return them
@@ -63,17 +102,27 @@ EXPORT_SYMBOL(__skb_flow_get_ports);
/**
* __skb_flow_dissect - extract the flow_keys struct and return it
* @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
+ * @flow_dissector: list of keys to dissect
+ * @target_container: target structure to put dissected values into
* @data: raw buffer pointer to the packet, if NULL use skb->data
* @proto: protocol for which to get the flow, if @data is NULL use skb->protocol
* @nhoff: network header offset, if @data is NULL use skb_network_offset(skb)
* @hlen: packet header length, if @data is NULL use skb_headlen(skb)
*
- * The function will try to retrieve the struct flow_keys from either the skbuff
- * or a raw buffer specified by the rest parameters
+ * The function will try to retrieve individual keys into target specified
+ * by flow_dissector from either the skbuff or a raw buffer specified by the
+ * rest parameters.
+ *
+ * Caller must take care of zeroing target container memory.
*/
-bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
+bool __skb_flow_dissect(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container,
void *data, __be16 proto, int nhoff, int hlen)
{
+ struct flow_dissector_key_basic *key_basic;
+ struct flow_dissector_key_addrs *key_addrs;
+ struct flow_dissector_key_ports *key_ports;
u8 ip_proto;
if (!data) {
@@ -83,7 +132,23 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
hlen = skb_headlen(skb);
}
- memset(flow, 0, sizeof(*flow));
+ /* It is ensured by skb_flow_dissector_init() that basic key will
+ * be always present.
+ */
+ key_basic = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ target_container);
+
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct ethhdr *eth = eth_hdr(skb);
+ struct flow_dissector_key_eth_addrs *key_eth_addrs;
+
+ key_eth_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ target_container);
+ memcpy(key_eth_addrs, &eth->h_dest, sizeof(*key_eth_addrs));
+ }
again:
switch (proto) {
@@ -100,14 +165,13 @@ ip:
if (ip_is_fragment(iph))
ip_proto = 0;
- /* skip the address processing if skb is NULL. The assumption
- * here is that if there is no skb we are not looking for flow
- * info but lengths and protocols.
- */
- if (!skb)
+ if (!skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS))
break;
-
- iph_to_flow_copy_addrs(flow, iph);
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ target_container);
+ memcpy(key_addrs, &iph->saddr, sizeof(*key_addrs));
break;
}
case htons(ETH_P_IPV6): {
@@ -123,23 +187,47 @@ ipv6:
ip_proto = iph->nexthdr;
nhoff += sizeof(struct ipv6hdr);
- /* see comment above in IPv4 section */
- if (!skb)
- break;
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) {
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
+ target_container);
+
+ key_addrs->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
+ key_addrs->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
+ goto flow_label;
+ }
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+ struct flow_dissector_key_ipv6_addrs *key_ipv6_addrs;
- flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
- flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
+ key_ipv6_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ target_container);
+ memcpy(key_ipv6_addrs, &iph->saddr, sizeof(*key_ipv6_addrs));
+ goto flow_label;
+ }
+ break;
+flow_label:
flow_label = ip6_flowlabel(iph);
if (flow_label) {
/* Awesome, IPv6 packet has a flow label so we can
* use that to represent the ports without any
* further dissection.
*/
- flow->n_proto = proto;
- flow->ip_proto = ip_proto;
- flow->ports = flow_label;
- flow->thoff = (u16)nhoff;
+
+ key_basic->n_proto = proto;
+ key_basic->ip_proto = ip_proto;
+ key_basic->thoff = (u16)nhoff;
+
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS)) {
+ key_ports = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ target_container);
+ key_ports->ports = flow_label;
+ }
return true;
}
@@ -186,14 +274,21 @@ ipv6:
hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
if (!hdr)
return false;
- flow->src = hdr->srcnode;
- flow->dst = 0;
- flow->n_proto = proto;
- flow->thoff = (u16)nhoff;
+ key_basic->n_proto = proto;
+ key_basic->thoff = (u16)nhoff;
+
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS)) {
+ key_addrs = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
+ target_container);
+ key_addrs->src = hdr->srcnode;
+ key_addrs->dst = 0;
+ }
return true;
}
case htons(ETH_P_FCOE):
- flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
+ key_basic->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
/* fall through */
default:
return false;
@@ -248,14 +343,24 @@ ipv6:
break;
}
- flow->n_proto = proto;
- flow->ip_proto = ip_proto;
- flow->thoff = (u16) nhoff;
-
- /* unless skb is set we don't need to record port info */
- if (skb)
- flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
- data, hlen);
+ /* It is ensured by skb_flow_dissector_init() that basic key will
+ * be always present.
+ */
+ key_basic = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ target_container);
+ key_basic->n_proto = proto;
+ key_basic->ip_proto = ip_proto;
+ key_basic->thoff = (u16) nhoff;
+
+ if (skb_flow_dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS)) {
+ key_ports = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ target_container);
+ key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
+ data, hlen);
+ }
return true;
}
@@ -267,27 +372,27 @@ static __always_inline void __flow_hash_secret_init(void)
net_get_random_once(&hashrnd, sizeof(hashrnd));
}
-static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c)
+static __always_inline u32 __flow_hash_3words(u32 a, u32 b, u32 c, u32 keyval)
{
- __flow_hash_secret_init();
- return jhash_3words(a, b, c, hashrnd);
+ return jhash_3words(a, b, c, keyval);
}
-static inline u32 __flow_hash_from_keys(struct flow_keys *keys)
+static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
{
u32 hash;
/* get a consistent hash (same value on both flow directions) */
- if (((__force u32)keys->dst < (__force u32)keys->src) ||
- (((__force u32)keys->dst == (__force u32)keys->src) &&
- ((__force u16)keys->port16[1] < (__force u16)keys->port16[0]))) {
- swap(keys->dst, keys->src);
- swap(keys->port16[0], keys->port16[1]);
+ if (((__force u32)keys->addrs.dst < (__force u32)keys->addrs.src) ||
+ (((__force u32)keys->addrs.dst == (__force u32)keys->addrs.src) &&
+ ((__force u16)keys->ports.dst < (__force u16)keys->ports.src))) {
+ swap(keys->addrs.dst, keys->addrs.src);
+ swap(keys->ports.src, keys->ports.dst);
}
- hash = __flow_hash_3words((__force u32)keys->dst,
- (__force u32)keys->src,
- (__force u32)keys->ports);
+ hash = __flow_hash_3words((__force u32)keys->addrs.dst,
+ (__force u32)keys->addrs.src,
+ (__force u32)keys->ports.ports,
+ keyval);
if (!hash)
hash = 1;
@@ -296,12 +401,52 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys)
u32 flow_hash_from_keys(struct flow_keys *keys)
{
- return __flow_hash_from_keys(keys);
+ __flow_hash_secret_init();
+ return __flow_hash_from_keys(keys, hashrnd);
}
EXPORT_SYMBOL(flow_hash_from_keys);
-/*
- * __skb_get_hash: calculate a flow hash based on src/dst addresses
+static inline u32 ___skb_get_hash(const struct sk_buff *skb,
+ struct flow_keys *keys, u32 keyval)
+{
+ if (!skb_flow_dissect_flow_keys(skb, keys))
+ return 0;
+
+ return __flow_hash_from_keys(keys, keyval);
+}
+
+struct _flow_keys_digest_data {
+ __be16 n_proto;
+ u8 ip_proto;
+ u8 padding;
+ __be32 ports;
+ __be32 src;
+ __be32 dst;
+};
+
+void make_flow_keys_digest(struct flow_keys_digest *digest,
+ const struct flow_keys *flow)
+{
+ struct _flow_keys_digest_data *data =
+ (struct _flow_keys_digest_data *)digest;
+
+ BUILD_BUG_ON(sizeof(*data) > sizeof(*digest));
+
+ memset(digest, 0, sizeof(*digest));
+
+ data->n_proto = flow->basic.n_proto;
+ data->ip_proto = flow->basic.ip_proto;
+ data->ports = flow->ports.ports;
+ data->src = flow->addrs.src;
+ data->dst = flow->addrs.dst;
+}
+EXPORT_SYMBOL(make_flow_keys_digest);
+
+/**
+ * __skb_get_hash: calculate a flow hash
+ * @skb: sk_buff to calculate flow hash from
+ *
+ * This function calculates a flow hash based on src/dst addresses
* and src/dst port numbers. Sets hash in skb to non-zero hash value
* on success, zero indicates no valid hash. Also, sets l4_hash in skb
* if hash is a canonical 4-tuple hash over transport ports.
@@ -309,53 +454,34 @@ EXPORT_SYMBOL(flow_hash_from_keys);
void __skb_get_hash(struct sk_buff *skb)
{
struct flow_keys keys;
+ u32 hash;
- if (!skb_flow_dissect(skb, &keys))
- return;
+ __flow_hash_secret_init();
- if (keys.ports)
+ hash = ___skb_get_hash(skb, &keys, hashrnd);
+ if (!hash)
+ return;
+ if (keys.ports.ports)
skb->l4_hash = 1;
-
skb->sw_hash = 1;
-
- skb->hash = __flow_hash_from_keys(&keys);
+ skb->hash = hash;
}
EXPORT_SYMBOL(__skb_get_hash);
-/*
- * Returns a Tx hash based on the given packet descriptor a Tx queues' number
- * to be used as a distribution range.
- */
-u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
- unsigned int num_tx_queues)
+__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
{
- u32 hash;
- u16 qoffset = 0;
- u16 qcount = num_tx_queues;
-
- if (skb_rx_queue_recorded(skb)) {
- hash = skb_get_rx_queue(skb);
- while (unlikely(hash >= num_tx_queues))
- hash -= num_tx_queues;
- return hash;
- }
-
- if (dev->num_tc) {
- u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
- qoffset = dev->tc_to_txq[tc].offset;
- qcount = dev->tc_to_txq[tc].count;
- }
+ struct flow_keys keys;
- return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
+ return ___skb_get_hash(skb, &keys, perturb);
}
-EXPORT_SYMBOL(__skb_tx_hash);
+EXPORT_SYMBOL(skb_get_hash_perturb);
u32 __skb_get_poff(const struct sk_buff *skb, void *data,
const struct flow_keys *keys, int hlen)
{
- u32 poff = keys->thoff;
+ u32 poff = keys->basic.thoff;
- switch (keys->ip_proto) {
+ switch (keys->basic.ip_proto) {
case IPPROTO_TCP: {
/* access doff as u8 to avoid unaligned access */
const u8 *doff;
@@ -396,8 +522,12 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
return poff;
}
-/* skb_get_poff() returns the offset to the payload as far as it could
- * be dissected. The main user is currently BPF, so that we can dynamically
+/**
+ * skb_get_poff - get the offset to the payload
+ * @skb: sk_buff to get the payload offset from
+ *
+ * The function will get the offset to the payload as far as it could
+ * be dissected. The main user is currently BPF, so that we can dynamically
* truncate packets without needing to push actual payload to the user
* space and can analyze headers only, instead.
*/
@@ -405,86 +535,52 @@ u32 skb_get_poff(const struct sk_buff *skb)
{
struct flow_keys keys;
- if (!skb_flow_dissect(skb, &keys))
+ if (!skb_flow_dissect_flow_keys(skb, &keys))
return 0;
return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
}
-static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
-{
-#ifdef CONFIG_XPS
- struct xps_dev_maps *dev_maps;
- struct xps_map *map;
- int queue_index = -1;
-
- rcu_read_lock();
- dev_maps = rcu_dereference(dev->xps_maps);
- if (dev_maps) {
- map = rcu_dereference(
- dev_maps->cpu_map[skb->sender_cpu - 1]);
- if (map) {
- if (map->len == 1)
- queue_index = map->queues[0];
- else
- queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
- map->len)];
- if (unlikely(queue_index >= dev->real_num_tx_queues))
- queue_index = -1;
- }
- }
- rcu_read_unlock();
-
- return queue_index;
-#else
- return -1;
-#endif
-}
-
-static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+static const struct flow_dissector_key flow_keys_dissector_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct flow_keys, basic),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV6_HASH_ADDRS,
+ .offset = offsetof(struct flow_keys, addrs),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_PORTS,
+ .offset = offsetof(struct flow_keys, ports),
+ },
+};
+
+static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct flow_keys, basic),
+ },
+};
+
+struct flow_dissector flow_keys_dissector __read_mostly;
+EXPORT_SYMBOL(flow_keys_dissector);
+
+struct flow_dissector flow_keys_buf_dissector __read_mostly;
+
+static int __init init_default_flow_dissectors(void)
{
- struct sock *sk = skb->sk;
- int queue_index = sk_tx_queue_get(sk);
-
- if (queue_index < 0 || skb->ooo_okay ||
- queue_index >= dev->real_num_tx_queues) {
- int new_index = get_xps_queue(dev, skb);
- if (new_index < 0)
- new_index = skb_tx_hash(dev, skb);
-
- if (queue_index != new_index && sk &&
- rcu_access_pointer(sk->sk_dst_cache))
- sk_tx_queue_set(sk, new_index);
-
- queue_index = new_index;
- }
-
- return queue_index;
+ skb_flow_dissector_init(&flow_keys_dissector,
+ flow_keys_dissector_keys,
+ ARRAY_SIZE(flow_keys_dissector_keys));
+ skb_flow_dissector_init(&flow_keys_buf_dissector,
+ flow_keys_buf_dissector_keys,
+ ARRAY_SIZE(flow_keys_buf_dissector_keys));
+ return 0;
}
-struct netdev_queue *netdev_pick_tx(struct net_device *dev,
- struct sk_buff *skb,
- void *accel_priv)
-{
- int queue_index = 0;
-
-#ifdef CONFIG_XPS
- if (skb->sender_cpu == 0)
- skb->sender_cpu = raw_smp_processor_id() + 1;
-#endif
-
- if (dev->real_num_tx_queues != 1) {
- const struct net_device_ops *ops = dev->netdev_ops;
- if (ops->ndo_select_queue)
- queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
- __netdev_pick_tx);
- else
- queue_index = __netdev_pick_tx(dev, skb);
-
- if (!accel_priv)
- queue_index = netdev_cap_txqueue(dev, queue_index);
- }
-
- skb_set_queue_mapping(skb, queue_index);
- return netdev_get_tx_queue(dev, queue_index);
-}
+late_initcall_sync(init_default_flow_dissectors);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 3de654256028..3a74df750af4 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -913,6 +913,7 @@ static void neigh_timer_handler(unsigned long arg)
neigh->nud_state = NUD_PROBE;
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
+ notify = 1;
next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
}
} else {
@@ -1144,6 +1145,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
if (new != old) {
neigh_del_timer(neigh);
+ if (new & NUD_PROBE)
+ atomic_set(&neigh->probes, 0);
if (new & NUD_IN_TIMER)
neigh_add_timer(neigh, (jiffies +
((new & NUD_REACHABLE) ?
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4238d6da5c60..18b34d771ed4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -458,11 +458,15 @@ static ssize_t phys_switch_id_show(struct device *dev,
return restart_syscall();
if (dev_isalive(netdev)) {
- struct netdev_phys_item_id ppid;
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .flags = SWITCHDEV_F_NO_RECURSE,
+ };
- ret = netdev_switch_parent_id_get(netdev, &ppid);
+ ret = switchdev_port_attr_get(netdev, &attr);
if (!ret)
- ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
+ ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len,
+ attr.u.ppid.id);
}
rtnl_unlock();
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 78fc04ad36fc..2c2eb1b629b1 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -147,24 +147,17 @@ static void ops_free_list(const struct pernet_operations *ops,
}
}
-static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
- int id);
+/* should be called with nsid_lock held */
static int alloc_netid(struct net *net, struct net *peer, int reqid)
{
- int min = 0, max = 0, id;
-
- ASSERT_RTNL();
+ int min = 0, max = 0;
if (reqid >= 0) {
min = reqid;
max = reqid + 1;
}
- id = idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
- if (id >= 0)
- rtnl_net_notifyid(net, peer, RTM_NEWNSID, id);
-
- return id;
+ return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
}
/* This function is used by idr_for_each(). If net is equal to peer, the
@@ -180,11 +173,16 @@ static int net_eq_idr(int id, void *net, void *peer)
return 0;
}
-static int __peernet2id(struct net *net, struct net *peer, bool alloc)
+/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc
+ * is set to true, thus the caller knows that the new id must be notified via
+ * rtnl.
+ */
+static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
{
int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
+ bool alloc_it = *alloc;
- ASSERT_RTNL();
+ *alloc = false;
/* Magic value for id 0. */
if (id == NET_ID_ZERO)
@@ -192,36 +190,77 @@ static int __peernet2id(struct net *net, struct net *peer, bool alloc)
if (id > 0)
return id;
- if (alloc)
- return alloc_netid(net, peer, -1);
+ if (alloc_it) {
+ id = alloc_netid(net, peer, -1);
+ *alloc = true;
+ return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
+ }
+
+ return NETNSA_NSID_NOT_ASSIGNED;
+}
+
+/* should be called with nsid_lock held */
+static int __peernet2id(struct net *net, struct net *peer)
+{
+ bool no = false;
- return -ENOENT;
+ return __peernet2id_alloc(net, peer, &no);
}
+static void rtnl_net_notifyid(struct net *net, int cmd, int id);
/* This function returns the id of a peer netns. If no id is assigned, one will
* be allocated and returned.
*/
+int peernet2id_alloc(struct net *net, struct net *peer)
+{
+ unsigned long flags;
+ bool alloc;
+ int id;
+
+ spin_lock_irqsave(&net->nsid_lock, flags);
+ alloc = atomic_read(&peer->count) == 0 ? false : true;
+ id = __peernet2id_alloc(net, peer, &alloc);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
+ if (alloc && id >= 0)
+ rtnl_net_notifyid(net, RTM_NEWNSID, id);
+ return id;
+}
+EXPORT_SYMBOL(peernet2id_alloc);
+
+/* This function returns, if assigned, the id of a peer netns. */
int peernet2id(struct net *net, struct net *peer)
{
- bool alloc = atomic_read(&peer->count) == 0 ? false : true;
+ unsigned long flags;
int id;
- id = __peernet2id(net, peer, alloc);
- return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
+ spin_lock_irqsave(&net->nsid_lock, flags);
+ id = __peernet2id(net, peer);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
+ return id;
+}
+
+/* This function returns true is the peer netns has an id assigned into the
+ * current netns.
+ */
+bool peernet_has_id(struct net *net, struct net *peer)
+{
+ return peernet2id(net, peer) >= 0;
}
-EXPORT_SYMBOL(peernet2id);
struct net *get_net_ns_by_id(struct net *net, int id)
{
+ unsigned long flags;
struct net *peer;
if (id < 0)
return NULL;
rcu_read_lock();
+ spin_lock_irqsave(&net->nsid_lock, flags);
peer = idr_find(&net->netns_ids, id);
if (peer)
get_net(peer);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
rcu_read_unlock();
return peer;
@@ -242,6 +281,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
net->dev_base_seq = 1;
net->user_ns = user_ns;
idr_init(&net->netns_ids);
+ spin_lock_init(&net->nsid_lock);
list_for_each_entry(ops, &pernet_list, list) {
error = ops_init(ops, net);
@@ -362,14 +402,19 @@ static void cleanup_net(struct work_struct *work)
list_del_rcu(&net->list);
list_add_tail(&net->exit_list, &net_exit_list);
for_each_net(tmp) {
- int id = __peernet2id(tmp, net, false);
+ int id;
- if (id >= 0) {
- rtnl_net_notifyid(tmp, net, RTM_DELNSID, id);
+ spin_lock_irq(&tmp->nsid_lock);
+ id = __peernet2id(tmp, net);
+ if (id >= 0)
idr_remove(&tmp->netns_ids, id);
- }
+ spin_unlock_irq(&tmp->nsid_lock);
+ if (id >= 0)
+ rtnl_net_notifyid(tmp, RTM_DELNSID, id);
}
+ spin_lock_irq(&net->nsid_lock);
idr_destroy(&net->netns_ids);
+ spin_unlock_irq(&net->nsid_lock);
}
rtnl_unlock();
@@ -497,6 +542,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
+ unsigned long flags;
struct net *peer;
int nsid, err;
@@ -517,14 +563,19 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
if (IS_ERR(peer))
return PTR_ERR(peer);
- if (__peernet2id(net, peer, false) >= 0) {
+ spin_lock_irqsave(&net->nsid_lock, flags);
+ if (__peernet2id(net, peer) >= 0) {
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
err = -EEXIST;
goto out;
}
err = alloc_netid(net, peer, nsid);
- if (err > 0)
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
+ if (err >= 0) {
+ rtnl_net_notifyid(net, RTM_NEWNSID, err);
err = 0;
+ }
out:
put_net(peer);
return err;
@@ -538,14 +589,10 @@ static int rtnl_net_get_size(void)
}
static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
- int cmd, struct net *net, struct net *peer,
- int nsid)
+ int cmd, struct net *net, int nsid)
{
struct nlmsghdr *nlh;
struct rtgenmsg *rth;
- int id;
-
- ASSERT_RTNL();
nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
if (!nlh)
@@ -554,14 +601,7 @@ static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
rth = nlmsg_data(nlh);
rth->rtgen_family = AF_UNSPEC;
- if (nsid >= 0) {
- id = nsid;
- } else {
- id = __peernet2id(net, peer, false);
- if (id < 0)
- id = NETNSA_NSID_NOT_ASSIGNED;
- }
- if (nla_put_s32(skb, NETNSA_NSID, id))
+ if (nla_put_s32(skb, NETNSA_NSID, nsid))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -578,7 +618,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
struct nlattr *tb[NETNSA_MAX + 1];
struct sk_buff *msg;
struct net *peer;
- int err;
+ int err, id;
err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
rtnl_net_policy);
@@ -600,8 +640,9 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
goto out;
}
+ id = peernet2id(net, peer);
err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
- RTM_GETNSID, net, peer, -1);
+ RTM_NEWNSID, net, id);
if (err < 0)
goto err_out;
@@ -633,7 +674,7 @@ static int rtnl_net_dumpid_one(int id, void *peer, void *data)
ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWNSID, net_cb->net, peer, id);
+ RTM_NEWNSID, net_cb->net, id);
if (ret < 0)
return ret;
@@ -652,17 +693,17 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
.idx = 0,
.s_idx = cb->args[0],
};
+ unsigned long flags;
- ASSERT_RTNL();
-
+ spin_lock_irqsave(&net->nsid_lock, flags);
idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
+ spin_unlock_irqrestore(&net->nsid_lock, flags);
cb->args[0] = net_cb.idx;
return skb->len;
}
-static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
- int id)
+static void rtnl_net_notifyid(struct net *net, int cmd, int id)
{
struct sk_buff *msg;
int err = -ENOMEM;
@@ -671,7 +712,7 @@ static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
if (!msg)
goto out;
- err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, peer, id);
+ err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id);
if (err < 0)
goto err_out;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 508155b283dd..676550f34560 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -177,7 +177,7 @@
#include <asm/dma.h>
#include <asm/div64.h> /* do_div */
-#define VERSION "2.74"
+#define VERSION "2.75"
#define IP_NAME_SZ 32
#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
#define MPLS_STACK_BOTTOM htonl(0x00000100)
@@ -210,6 +210,10 @@
#define T_REMDEVALL (1<<2) /* Remove all devs */
#define T_REMDEV (1<<3) /* Remove one dev */
+/* Xmit modes */
+#define M_START_XMIT 0 /* Default normal TX */
+#define M_NETIF_RECEIVE 1 /* Inject packets into stack */
+
/* If lock -- protects updating of if_list */
#define if_lock(t) spin_lock(&(t->if_lock));
#define if_unlock(t) spin_unlock(&(t->if_lock));
@@ -251,13 +255,14 @@ struct pktgen_dev {
* we will do a random selection from within the range.
*/
__u32 flags;
- int removal_mark; /* non-zero => the device is marked for
- * removal by worker thread */
-
+ int xmit_mode;
int min_pkt_size;
int max_pkt_size;
int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */
int nfrags;
+ int removal_mark; /* non-zero => the device is marked for
+ * removal by worker thread */
+
struct page *page;
u64 delay; /* nano-seconds */
@@ -507,7 +512,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
pktgen_reset_all_threads(pn);
else
- pr_warn("Unknown command: %s\n", data);
+ return -EINVAL;
return count;
}
@@ -567,7 +572,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
" dst_min: %s dst_max: %s\n",
pkt_dev->dst_min, pkt_dev->dst_max);
seq_printf(seq,
- " src_min: %s src_max: %s\n",
+ " src_min: %s src_max: %s\n",
pkt_dev->src_min, pkt_dev->src_max);
}
@@ -620,6 +625,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
if (pkt_dev->node >= 0)
seq_printf(seq, " node: %d\n", pkt_dev->node);
+ if (pkt_dev->xmit_mode == M_NETIF_RECEIVE)
+ seq_puts(seq, " xmit_mode: netif_receive\n");
+
seq_puts(seq, " Flags: ");
if (pkt_dev->flags & F_IPV6)
@@ -1081,7 +1089,8 @@ static ssize_t pktgen_if_write(struct file *file,
if (len < 0)
return len;
if ((value > 0) &&
- (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
+ ((pkt_dev->xmit_mode == M_NETIF_RECEIVE) ||
+ !(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
return -ENOTSUPP;
i += len;
pkt_dev->clone_skb = value;
@@ -1134,7 +1143,7 @@ static ssize_t pktgen_if_write(struct file *file,
return len;
i += len;
- if ((value > 1) &&
+ if ((value > 1) && (pkt_dev->xmit_mode == M_START_XMIT) &&
(!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
return -ENOTSUPP;
pkt_dev->burst = value < 1 ? 1 : value;
@@ -1160,6 +1169,45 @@ static ssize_t pktgen_if_write(struct file *file,
sprintf(pg_result, "ERROR: node not possible");
return count;
}
+ if (!strcmp(name, "xmit_mode")) {
+ char f[32];
+
+ memset(f, 0, 32);
+ len = strn_len(&user_buffer[i], sizeof(f) - 1);
+ if (len < 0)
+ return len;
+
+ if (copy_from_user(f, &user_buffer[i], len))
+ return -EFAULT;
+ i += len;
+
+ if (strcmp(f, "start_xmit") == 0) {
+ pkt_dev->xmit_mode = M_START_XMIT;
+ } else if (strcmp(f, "netif_receive") == 0) {
+ /* clone_skb set earlier, not supported in this mode */
+ if (pkt_dev->clone_skb > 0)
+ return -ENOTSUPP;
+
+ pkt_dev->xmit_mode = M_NETIF_RECEIVE;
+
+ /* make sure new packet is allocated every time
+ * pktgen_xmit() is called
+ */
+ pkt_dev->last_ok = 1;
+
+ /* override clone_skb if user passed default value
+ * at module loading time
+ */
+ pkt_dev->clone_skb = 0;
+ } else {
+ sprintf(pg_result,
+ "xmit_mode -:%s:- unknown\nAvailable modes: %s",
+ f, "start_xmit, netif_receive\n");
+ return count;
+ }
+ sprintf(pg_result, "OK: xmit_mode=%s", f);
+ return count;
+ }
if (!strcmp(name, "flag")) {
char f[32];
memset(f, 0, 32);
@@ -1267,6 +1315,9 @@ static ssize_t pktgen_if_write(struct file *file,
else if (strcmp(f, "NO_TIMESTAMP") == 0)
pkt_dev->flags |= F_NO_TIMESTAMP;
+ else if (strcmp(f, "!NO_TIMESTAMP") == 0)
+ pkt_dev->flags &= ~F_NO_TIMESTAMP;
+
else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
@@ -3317,6 +3368,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
unsigned int burst = ACCESS_ONCE(pkt_dev->burst);
struct net_device *odev = pkt_dev->odev;
struct netdev_queue *txq;
+ struct sk_buff *skb;
int ret;
/* If device is offline, then don't send */
@@ -3354,6 +3406,37 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
if (pkt_dev->delay && pkt_dev->last_ok)
spin(pkt_dev, pkt_dev->next_tx);
+ if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) {
+ skb = pkt_dev->skb;
+ skb->protocol = eth_type_trans(skb, skb->dev);
+ atomic_add(burst, &skb->users);
+ local_bh_disable();
+ do {
+ ret = netif_receive_skb(skb);
+ if (ret == NET_RX_DROP)
+ pkt_dev->errors++;
+ pkt_dev->sofar++;
+ pkt_dev->seq_num++;
+ if (atomic_read(&skb->users) != burst) {
+ /* skb was queued by rps/rfs or taps,
+ * so cannot reuse this skb
+ */
+ atomic_sub(burst - 1, &skb->users);
+ /* get out of the loop and wait
+ * until skb is consumed
+ */
+ break;
+ }
+ /* skb was 'freed' by stack, so clean few
+ * bits and reuse it
+ */
+#ifdef CONFIG_NET_CLS_ACT
+ skb->tc_verd = 0; /* reset reclass/redir ttl */
+#endif
+ } while (--burst > 0);
+ goto out; /* Skips xmit_mode M_START_XMIT */
+ }
+
txq = skb_get_tx_queue(odev, pkt_dev->skb);
local_bh_disable();
@@ -3401,6 +3484,7 @@ xmit_more:
unlock:
HARD_TX_UNLOCK(odev, txq);
+out:
local_bh_enable();
/* If pkt_dev->count is zero, then run forever */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 358d52a38533..077b6d280371 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1004,16 +1004,20 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
{
int err;
- struct netdev_phys_item_id psid;
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .flags = SWITCHDEV_F_NO_RECURSE,
+ };
- err = netdev_switch_parent_id_get(dev, &psid);
+ err = switchdev_port_attr_get(dev, &attr);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
return err;
}
- if (nla_put(skb, IFLA_PHYS_SWITCH_ID, psid.id_len, psid.id))
+ if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.u.ppid.id_len,
+ attr.u.ppid.id))
return -EMSGSIZE;
return 0;
@@ -1204,7 +1208,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
if (!net_eq(dev_net(dev), link_net)) {
- int id = peernet2id(dev_net(dev), link_net);
+ int id = peernet2id_alloc(dev_net(dev), link_net);
if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
goto nla_put_failure;
@@ -2416,6 +2420,9 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
{
struct sk_buff *skb;
+ if (dev->reg_state != NETREG_REGISTERED)
+ return;
+
skb = rtmsg_ifinfo_build_skb(type, dev, change, flags);
if (skb)
rtmsg_ifinfo_send(skb, dev, flags);
@@ -2854,7 +2861,7 @@ static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask,
int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev, u16 mode,
- u32 flags, u32 mask)
+ u32 flags, u32 mask, int nlflags)
{
struct nlmsghdr *nlh;
struct ifinfomsg *ifm;
@@ -2863,7 +2870,7 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
struct net_device *br_dev = netdev_master_upper_dev_get(dev);
- nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -2969,7 +2976,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) {
if (idx >= cb->args[0] &&
br_dev->netdev_ops->ndo_bridge_getlink(
- skb, portid, seq, dev, filter_mask) < 0)
+ skb, portid, seq, dev, filter_mask,
+ NLM_F_MULTI) < 0)
break;
idx++;
}
@@ -2977,7 +2985,8 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
if (ops->ndo_bridge_getlink) {
if (idx >= cb->args[0] &&
ops->ndo_bridge_getlink(skb, portid, seq, dev,
- filter_mask) < 0)
+ filter_mask,
+ NLM_F_MULTI) < 0)
break;
idx++;
}
@@ -3018,7 +3027,7 @@ static int rtnl_bridge_notify(struct net_device *dev)
goto errout;
}
- err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0);
+ err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0, 0);
if (err < 0)
goto errout;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3cfff2a3d651..f3fe9bd9e672 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -347,94 +347,18 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
}
EXPORT_SYMBOL(build_skb);
-struct netdev_alloc_cache {
- struct page_frag frag;
- /* we maintain a pagecount bias, so that we dont dirty cache line
- * containing page->_count every time we allocate a fragment.
- */
- unsigned int pagecnt_bias;
-};
-static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
-static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
-
-static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
- gfp_t gfp_mask)
-{
- const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER;
- struct page *page = NULL;
- gfp_t gfp = gfp_mask;
-
- if (order) {
- gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
- __GFP_NOMEMALLOC;
- page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
- nc->frag.size = PAGE_SIZE << (page ? order : 0);
- }
-
- if (unlikely(!page))
- page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
-
- nc->frag.page = page;
-
- return page;
-}
-
-static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache,
- unsigned int fragsz, gfp_t gfp_mask)
-{
- struct netdev_alloc_cache *nc = this_cpu_ptr(cache);
- struct page *page = nc->frag.page;
- unsigned int size;
- int offset;
-
- if (unlikely(!page)) {
-refill:
- page = __page_frag_refill(nc, gfp_mask);
- if (!page)
- return NULL;
-
- /* if size can vary use frag.size else just use PAGE_SIZE */
- size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
-
- /* Even if we own the page, we do not use atomic_set().
- * This would break get_page_unless_zero() users.
- */
- atomic_add(size - 1, &page->_count);
-
- /* reset page count bias and offset to start of new frag */
- nc->pagecnt_bias = size;
- nc->frag.offset = size;
- }
-
- offset = nc->frag.offset - fragsz;
- if (unlikely(offset < 0)) {
- if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
- goto refill;
-
- /* if size can vary use frag.size else just use PAGE_SIZE */
- size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
-
- /* OK, page count is 0, we can safely set it */
- atomic_set(&page->_count, size);
-
- /* reset page count bias and offset to start of new frag */
- nc->pagecnt_bias = size;
- offset = size - fragsz;
- }
-
- nc->pagecnt_bias--;
- nc->frag.offset = offset;
-
- return page_address(page) + offset;
-}
+static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
+static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
+ struct page_frag_cache *nc;
unsigned long flags;
void *data;
local_irq_save(flags);
- data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask);
+ nc = this_cpu_ptr(&netdev_alloc_cache);
+ data = __alloc_page_frag(nc, fragsz, gfp_mask);
local_irq_restore(flags);
return data;
}
@@ -454,7 +378,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{
- return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask);
+ struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+
+ return __alloc_page_frag(nc, fragsz, gfp_mask);
}
void *napi_alloc_frag(unsigned int fragsz)
@@ -464,76 +390,70 @@ void *napi_alloc_frag(unsigned int fragsz)
EXPORT_SYMBOL(napi_alloc_frag);
/**
- * __alloc_rx_skb - allocate an skbuff for rx
+ * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
+ * @dev: network device to receive on
* @length: length to allocate
* @gfp_mask: get_free_pages mask, passed to alloc_skb
- * @flags: If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
- * allocations in case we have to fallback to __alloc_skb()
- * If SKB_ALLOC_NAPI is set, page fragment will be allocated
- * from napi_cache instead of netdev_cache.
*
* Allocate a new &sk_buff and assign it a usage count of one. The
- * buffer has unspecified headroom built in. Users should allocate
+ * buffer has NET_SKB_PAD headroom built in. Users should allocate
* the headroom they think they need without accounting for the
* built in space. The built in space is used for optimisations.
*
* %NULL is returned if there is no free memory.
*/
-static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask,
- int flags)
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
+ gfp_t gfp_mask)
{
- struct sk_buff *skb = NULL;
- unsigned int fragsz = SKB_DATA_ALIGN(length) +
- SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ struct page_frag_cache *nc;
+ unsigned long flags;
+ struct sk_buff *skb;
+ bool pfmemalloc;
+ void *data;
- if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
- void *data;
+ len += NET_SKB_PAD;
- if (sk_memalloc_socks())
- gfp_mask |= __GFP_MEMALLOC;
+ if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
+ (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+ skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
+ if (!skb)
+ goto skb_fail;
+ goto skb_success;
+ }
- data = (flags & SKB_ALLOC_NAPI) ?
- __napi_alloc_frag(fragsz, gfp_mask) :
- __netdev_alloc_frag(fragsz, gfp_mask);
+ len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ len = SKB_DATA_ALIGN(len);
- if (likely(data)) {
- skb = build_skb(data, fragsz);
- if (unlikely(!skb))
- put_page(virt_to_head_page(data));
- }
- } else {
- skb = __alloc_skb(length, gfp_mask,
- SKB_ALLOC_RX, NUMA_NO_NODE);
- }
- return skb;
-}
+ if (sk_memalloc_socks())
+ gfp_mask |= __GFP_MEMALLOC;
-/**
- * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
- * @dev: network device to receive on
- * @length: length to allocate
- * @gfp_mask: get_free_pages mask, passed to alloc_skb
- *
- * Allocate a new &sk_buff and assign it a usage count of one. The
- * buffer has NET_SKB_PAD headroom built in. Users should allocate
- * the headroom they think they need without accounting for the
- * built in space. The built in space is used for optimisations.
- *
- * %NULL is returned if there is no free memory.
- */
-struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
- unsigned int length, gfp_t gfp_mask)
-{
- struct sk_buff *skb;
+ local_irq_save(flags);
+
+ nc = this_cpu_ptr(&netdev_alloc_cache);
+ data = __alloc_page_frag(nc, len, gfp_mask);
+ pfmemalloc = nc->pfmemalloc;
- length += NET_SKB_PAD;
- skb = __alloc_rx_skb(length, gfp_mask, 0);
+ local_irq_restore(flags);
+
+ if (unlikely(!data))
+ return NULL;
- if (likely(skb)) {
- skb_reserve(skb, NET_SKB_PAD);
- skb->dev = dev;
+ skb = __build_skb(data, len);
+ if (unlikely(!skb)) {
+ skb_free_frag(data);
+ return NULL;
}
+ /* use OR instead of assignment to avoid clearing of bits in mask */
+ if (pfmemalloc)
+ skb->pfmemalloc = 1;
+ skb->head_frag = 1;
+
+skb_success:
+ skb_reserve(skb, NET_SKB_PAD);
+ skb->dev = dev;
+
+skb_fail:
return skb;
}
EXPORT_SYMBOL(__netdev_alloc_skb);
@@ -551,19 +471,49 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
*
* %NULL is returned if there is no free memory.
*/
-struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
- unsigned int length, gfp_t gfp_mask)
+struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
+ gfp_t gfp_mask)
{
+ struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
struct sk_buff *skb;
+ void *data;
- length += NET_SKB_PAD + NET_IP_ALIGN;
- skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI);
+ len += NET_SKB_PAD + NET_IP_ALIGN;
- if (likely(skb)) {
- skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
- skb->dev = napi->dev;
+ if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
+ (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+ skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
+ if (!skb)
+ goto skb_fail;
+ goto skb_success;
}
+ len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ len = SKB_DATA_ALIGN(len);
+
+ if (sk_memalloc_socks())
+ gfp_mask |= __GFP_MEMALLOC;
+
+ data = __alloc_page_frag(nc, len, gfp_mask);
+ if (unlikely(!data))
+ return NULL;
+
+ skb = __build_skb(data, len);
+ if (unlikely(!skb)) {
+ skb_free_frag(data);
+ return NULL;
+ }
+
+ /* use OR instead of assignment to avoid clearing of bits in mask */
+ if (nc->pfmemalloc)
+ skb->pfmemalloc = 1;
+ skb->head_frag = 1;
+
+skb_success:
+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+ skb->dev = napi->dev;
+
+skb_fail:
return skb;
}
EXPORT_SYMBOL(__napi_alloc_skb);
@@ -611,10 +561,12 @@ static void skb_clone_fraglist(struct sk_buff *skb)
static void skb_free_head(struct sk_buff *skb)
{
+ unsigned char *head = skb->head;
+
if (skb->head_frag)
- put_page(virt_to_head_page(skb->head));
+ skb_free_frag(head);
else
- kfree(skb->head);
+ kfree(head);
}
static void skb_release_data(struct sk_buff *skb)
@@ -4030,6 +3982,93 @@ int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
}
EXPORT_SYMBOL(skb_checksum_setup);
+/**
+ * skb_checksum_maybe_trim - maybe trims the given skb
+ * @skb: the skb to check
+ * @transport_len: the data length beyond the network header
+ *
+ * Checks whether the given skb has data beyond the given transport length.
+ * If so, returns a cloned skb trimmed to this transport length.
+ * Otherwise returns the provided skb. Returns NULL in error cases
+ * (e.g. transport_len exceeds skb length or out-of-memory).
+ *
+ * Caller needs to set the skb transport header and release the returned skb.
+ * Provided skb is consumed.
+ */
+static struct sk_buff *skb_checksum_maybe_trim(struct sk_buff *skb,
+ unsigned int transport_len)
+{
+ struct sk_buff *skb_chk;
+ unsigned int len = skb_transport_offset(skb) + transport_len;
+ int ret;
+
+ if (skb->len < len) {
+ kfree_skb(skb);
+ return NULL;
+ } else if (skb->len == len) {
+ return skb;
+ }
+
+ skb_chk = skb_clone(skb, GFP_ATOMIC);
+ kfree_skb(skb);
+
+ if (!skb_chk)
+ return NULL;
+
+ ret = pskb_trim_rcsum(skb_chk, len);
+ if (ret) {
+ kfree_skb(skb_chk);
+ return NULL;
+ }
+
+ return skb_chk;
+}
+
+/**
+ * skb_checksum_trimmed - validate checksum of an skb
+ * @skb: the skb to check
+ * @transport_len: the data length beyond the network header
+ * @skb_chkf: checksum function to use
+ *
+ * Applies the given checksum function skb_chkf to the provided skb.
+ * Returns a checked and maybe trimmed skb. Returns NULL on error.
+ *
+ * If the skb has data beyond the given transport length, then a
+ * trimmed & cloned skb is checked and returned.
+ *
+ * Caller needs to set the skb transport header and release the returned skb.
+ * Provided skb is consumed.
+ */
+struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
+ unsigned int transport_len,
+ __sum16(*skb_chkf)(struct sk_buff *skb))
+{
+ struct sk_buff *skb_chk;
+ unsigned int offset = skb_transport_offset(skb);
+ __sum16 ret;
+
+ skb_chk = skb_checksum_maybe_trim(skb, transport_len);
+ if (!skb_chk)
+ return NULL;
+
+ if (!pskb_may_pull(skb_chk, offset)) {
+ kfree_skb(skb_chk);
+ return NULL;
+ }
+
+ __skb_pull(skb_chk, offset);
+ ret = skb_chkf(skb_chk);
+ __skb_push(skb_chk, offset);
+
+ if (ret) {
+ kfree_skb(skb_chk);
+ return NULL;
+ }
+
+ return skb_chk;
+}
+EXPORT_SYMBOL(skb_checksum_trimmed);
+
void __skb_warn_lro_forwarding(const struct sk_buff *skb)
{
net_warn_ratelimited("%s: received packets cannot be forwarded while LRO is enabled\n",
diff --git a/net/core/sock.c b/net/core/sock.c
index e891bcf325ca..29124fcdc42a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1396,9 +1396,10 @@ EXPORT_SYMBOL_GPL(sock_update_netprioidx);
* @family: protocol family
* @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
* @prot: struct proto associated with this new sock instance
+ * @kern: is this to be a kernel socket?
*/
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
- struct proto *prot)
+ struct proto *prot, int kern)
{
struct sock *sk;
@@ -1411,7 +1412,10 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
*/
sk->sk_prot = sk->sk_prot_creator = prot;
sock_lock_init(sk);
- sock_net_set(sk, get_net(net));
+ sk->sk_net_refcnt = kern ? 0 : 1;
+ if (likely(sk->sk_net_refcnt))
+ get_net(net);
+ sock_net_set(sk, net);
atomic_set(&sk->sk_wmem_alloc, 1);
sock_update_classid(sk);
@@ -1445,7 +1449,8 @@ static void __sk_free(struct sock *sk)
if (sk->sk_peer_cred)
put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
- put_net(sock_net(sk));
+ if (likely(sk->sk_net_refcnt))
+ put_net(sock_net(sk));
sk_prot_free(sk->sk_prot_creator, sk);
}
@@ -1461,25 +1466,6 @@ void sk_free(struct sock *sk)
}
EXPORT_SYMBOL(sk_free);
-/*
- * Last sock_put should drop reference to sk->sk_net. It has already
- * been dropped in sk_change_net. Taking reference to stopping namespace
- * is not an option.
- * Take reference to a socket to remove it from hash _alive_ and after that
- * destroy it in the context of init_net.
- */
-void sk_release_kernel(struct sock *sk)
-{
- if (sk == NULL || sk->sk_socket == NULL)
- return;
-
- sock_hold(sk);
- sock_net_set(sk, get_net(&init_net));
- sock_release(sk->sk_socket);
- sock_put(sk);
-}
-EXPORT_SYMBOL(sk_release_kernel);
-
static void sk_update_clone(const struct sock *sk, struct sock *newsk)
{
if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
@@ -2083,12 +2069,13 @@ EXPORT_SYMBOL(__sk_mem_schedule);
/**
* __sk_reclaim - reclaim memory_allocated
* @sk: socket
+ * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
*/
-void __sk_mem_reclaim(struct sock *sk)
+void __sk_mem_reclaim(struct sock *sk, int amount)
{
- sk_memory_allocated_sub(sk,
- sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
- sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
+ amount >>= SK_MEM_QUANTUM_SHIFT;
+ sk_memory_allocated_sub(sk, amount);
+ sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
if (sk_under_memory_pressure(sk) &&
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
diff --git a/net/core/stream.c b/net/core/stream.c
index 301c05f26060..d70f77a0c889 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -119,6 +119,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
int err = 0;
long vm_wait = 0;
long current_timeo = *timeo_p;
+ bool noblock = (*timeo_p ? false : true);
DEFINE_WAIT(wait);
if (sk_stream_memory_free(sk))
@@ -131,8 +132,11 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto do_error;
- if (!*timeo_p)
+ if (!*timeo_p) {
+ if (noblock)
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
goto do_nonblock;
+ }
if (signal_pending(current))
goto do_interrupted;
clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 754484b3cd0e..675cf94e04f8 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -468,10 +468,10 @@ static struct proto dn_proto = {
.obj_size = sizeof(struct dn_sock),
};
-static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp)
+static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp, int kern)
{
struct dn_scp *scp;
- struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto);
+ struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, kern);
if (!sk)
goto out;
@@ -693,7 +693,7 @@ static int dn_create(struct net *net, struct socket *sock, int protocol,
}
- if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL)
+ if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL, kern)) == NULL)
return -ENOBUFS;
sk->sk_protocol = protocol;
@@ -1096,7 +1096,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
cb = DN_SKB_CB(skb);
sk->sk_ack_backlog--;
- newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation);
+ newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, 0);
if (newsk == NULL) {
release_sock(sk);
kfree_skb(skb);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 079a224471e7..e6f6cc3a1bcf 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -633,7 +633,7 @@ static int dsa_of_probe(struct device *dev)
if (cd->sw_addr > PHY_MAX_ADDR)
continue;
- if (!of_property_read_u32(np, "eeprom-length", &eeprom_len))
+ if (!of_property_read_u32(child, "eeprom-length", &eeprom_len))
cd->eeprom_len = eeprom_len;
for_each_available_child_of_node(child, port) {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 827cda560a55..04ffad311704 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -345,6 +345,24 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state)
return ret;
}
+static int dsa_slave_port_attr_set(struct net_device *dev,
+ struct switchdev_attr *attr)
+{
+ int ret = 0;
+
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_PORT_STP_STATE:
+ if (attr->trans == SWITCHDEV_TRANS_COMMIT)
+ ret = dsa_slave_stp_update(dev, attr->u.stp_state);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ return ret;
+}
+
static int dsa_slave_bridge_port_join(struct net_device *dev,
struct net_device *br)
{
@@ -382,14 +400,20 @@ static int dsa_slave_bridge_port_leave(struct net_device *dev)
return ret;
}
-static int dsa_slave_parent_id_get(struct net_device *dev,
- struct netdev_phys_item_id *psid)
+static int dsa_slave_port_attr_get(struct net_device *dev,
+ struct switchdev_attr *attr)
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- psid->id_len = sizeof(ds->index);
- memcpy(&psid->id, &ds->index, psid->id_len);
+ switch (attr->id) {
+ case SWITCHDEV_ATTR_PORT_PARENT_ID:
+ attr->u.ppid.id_len = sizeof(ds->index);
+ memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
return 0;
}
@@ -675,9 +699,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_get_iflink = dsa_slave_get_iflink,
};
-static const struct swdev_ops dsa_slave_swdev_ops = {
- .swdev_parent_id_get = dsa_slave_parent_id_get,
- .swdev_port_stp_update = dsa_slave_stp_update,
+static const struct switchdev_ops dsa_slave_switchdev_ops = {
+ .switchdev_port_attr_get = dsa_slave_port_attr_get,
+ .switchdev_port_attr_set = dsa_slave_port_attr_set,
};
static void dsa_slave_adjust_link(struct net_device *dev)
@@ -810,12 +834,19 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
return 0;
}
+static struct lock_class_key dsa_slave_netdev_xmit_lock_key;
+static void dsa_slave_set_lockdep_class_one(struct net_device *dev,
+ struct netdev_queue *txq,
+ void *_unused)
+{
+ lockdep_set_class(&txq->_xmit_lock,
+ &dsa_slave_netdev_xmit_lock_key);
+}
+
int dsa_slave_suspend(struct net_device *slave_dev)
{
struct dsa_slave_priv *p = netdev_priv(slave_dev);
- netif_device_detach(slave_dev);
-
if (p->phy) {
phy_stop(p->phy);
p->old_pause = -1;
@@ -859,7 +890,10 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
eth_hw_addr_inherit(slave_dev, master);
slave_dev->tx_queue_len = 0;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
- slave_dev->swdev_ops = &dsa_slave_swdev_ops;
+ slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
+
+ netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
+ NULL);
SET_NETDEV_DEV(slave_dev, parent);
slave_dev->dev.of_node = ds->pd->port_dn[port];
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index f3bad41d725f..c3325bd2f3fb 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -58,6 +58,7 @@
#include <net/ipv6.h>
#include <net/ip.h>
#include <net/dsa.h>
+#include <net/flow_dissector.h>
#include <linux/uaccess.h>
__setup("ether=", netdev_boot_setup);
@@ -130,9 +131,9 @@ u32 eth_get_headlen(void *data, unsigned int len)
return len;
/* parse any remaining L2/L3 headers, check for L4 */
- if (!__skb_flow_dissect(NULL, &keys, data,
- eth->h_proto, sizeof(*eth), len))
- return max_t(u32, keys.thoff, sizeof(*eth));
+ if (!skb_flow_dissect_flow_keys_buf(&keys, data, eth->h_proto,
+ sizeof(*eth), len))
+ return max_t(u32, keys.basic.thoff, sizeof(*eth));
/* parse for any L4 headers */
return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len);
@@ -156,10 +157,11 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
skb->dev = dev;
skb_reset_mac_header(skb);
+
+ eth = (struct ethhdr *)skb->data;
skb_pull_inline(skb, ETH_HLEN);
- eth = eth_hdr(skb);
- if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
+ if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
skb->pkt_type = PACKET_BROADCAST;
else
@@ -178,7 +180,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
if (unlikely(netdev_uses_dsa(dev)))
return htons(ETH_P_XDSA);
- if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN))
+ if (likely(eth_proto_is_802_3(eth->h_proto)))
return eth->h_proto;
/*
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index 05dab2957cd4..4adfd4d5471b 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -3,7 +3,9 @@ obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o
obj-y += 6lowpan/
ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \
- header_ops.o sysfs.o nl802154.o
+ header_ops.o sysfs.o nl802154.o trace.o
ieee802154_socket-y := socket.o
+CFLAGS_trace.o := -I$(src)
+
ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 1b9d25f6e898..346c6665d25e 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -175,6 +175,7 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info)
int rc = -ENOBUFS;
struct net_device *dev;
int type = __IEEE802154_DEV_INVALID;
+ unsigned char name_assign_type;
pr_debug("%s\n", __func__);
@@ -190,8 +191,10 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info)
if (devname[nla_len(info->attrs[IEEE802154_ATTR_DEV_NAME]) - 1]
!= '\0')
return -EINVAL; /* phy name should be null-terminated */
+ name_assign_type = NET_NAME_USER;
} else {
devname = "wpan%d";
+ name_assign_type = NET_NAME_ENUM;
}
if (strlen(devname) >= IFNAMSIZ)
@@ -221,7 +224,7 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info)
}
dev = rdev_add_virtual_intf_deprecated(wpan_phy_to_rdev(phy), devname,
- type);
+ name_assign_type, type);
if (IS_ERR(dev)) {
rc = PTR_ERR(dev);
goto nla_put_failure;
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index a4daf91b8d0a..f3c12f6a4a39 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -589,7 +589,7 @@ static int nl802154_new_interface(struct sk_buff *skb, struct genl_info *info)
return rdev_add_virtual_intf(rdev,
nla_data(info->attrs[NL802154_ATTR_IFNAME]),
- type, extended_addr);
+ NET_NAME_USER, type, extended_addr);
}
static int nl802154_del_interface(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 7c46732fad2b..7b5a9dd94fe5 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -4,13 +4,16 @@
#include <net/cfg802154.h>
#include "core.h"
+#include "trace.h"
static inline struct net_device *
rdev_add_virtual_intf_deprecated(struct cfg802154_registered_device *rdev,
- const char *name, int type)
+ const char *name,
+ unsigned char name_assign_type,
+ int type)
{
return rdev->ops->add_virtual_intf_deprecated(&rdev->wpan_phy, name,
- type);
+ name_assign_type, type);
}
static inline void
@@ -22,75 +25,131 @@ rdev_del_virtual_intf_deprecated(struct cfg802154_registered_device *rdev,
static inline int
rdev_add_virtual_intf(struct cfg802154_registered_device *rdev, char *name,
+ unsigned char name_assign_type,
enum nl802154_iftype type, __le64 extended_addr)
{
- return rdev->ops->add_virtual_intf(&rdev->wpan_phy, name, type,
+ int ret;
+
+ trace_802154_rdev_add_virtual_intf(&rdev->wpan_phy, name, type,
extended_addr);
+ ret = rdev->ops->add_virtual_intf(&rdev->wpan_phy, name,
+ name_assign_type, type,
+ extended_addr);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
}
static inline int
rdev_del_virtual_intf(struct cfg802154_registered_device *rdev,
struct wpan_dev *wpan_dev)
{
- return rdev->ops->del_virtual_intf(&rdev->wpan_phy, wpan_dev);
+ int ret;
+
+ trace_802154_rdev_del_virtual_intf(&rdev->wpan_phy, wpan_dev);
+ ret = rdev->ops->del_virtual_intf(&rdev->wpan_phy, wpan_dev);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
}
static inline int
rdev_set_channel(struct cfg802154_registered_device *rdev, u8 page, u8 channel)
{
- return rdev->ops->set_channel(&rdev->wpan_phy, page, channel);
+ int ret;
+
+ trace_802154_rdev_set_channel(&rdev->wpan_phy, page, channel);
+ ret = rdev->ops->set_channel(&rdev->wpan_phy, page, channel);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
}
static inline int
rdev_set_cca_mode(struct cfg802154_registered_device *rdev,
const struct wpan_phy_cca *cca)
{
- return rdev->ops->set_cca_mode(&rdev->wpan_phy, cca);
+ int ret;
+
+ trace_802154_rdev_set_cca_mode(&rdev->wpan_phy, cca);
+ ret = rdev->ops->set_cca_mode(&rdev->wpan_phy, cca);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
}
static inline int
rdev_set_pan_id(struct cfg802154_registered_device *rdev,
struct wpan_dev *wpan_dev, __le16 pan_id)
{
- return rdev->ops->set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id);
+ int ret;
+
+ trace_802154_rdev_set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id);
+ ret = rdev->ops->set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
}
static inline int
rdev_set_short_addr(struct cfg802154_registered_device *rdev,
struct wpan_dev *wpan_dev, __le16 short_addr)
{
- return rdev->ops->set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr);
+ int ret;
+
+ trace_802154_rdev_set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr);
+ ret = rdev->ops->set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
}
static inline int
rdev_set_backoff_exponent(struct cfg802154_registered_device *rdev,
struct wpan_dev *wpan_dev, u8 min_be, u8 max_be)
{
- return rdev->ops->set_backoff_exponent(&rdev->wpan_phy, wpan_dev,
+ int ret;
+
+ trace_802154_rdev_set_backoff_exponent(&rdev->wpan_phy, wpan_dev,
min_be, max_be);
+ ret = rdev->ops->set_backoff_exponent(&rdev->wpan_phy, wpan_dev,
+ min_be, max_be);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
}
static inline int
rdev_set_max_csma_backoffs(struct cfg802154_registered_device *rdev,
struct wpan_dev *wpan_dev, u8 max_csma_backoffs)
{
- return rdev->ops->set_max_csma_backoffs(&rdev->wpan_phy, wpan_dev,
- max_csma_backoffs);
+ int ret;
+
+ trace_802154_rdev_set_csma_backoffs(&rdev->wpan_phy, wpan_dev,
+ max_csma_backoffs);
+ ret = rdev->ops->set_max_csma_backoffs(&rdev->wpan_phy, wpan_dev,
+ max_csma_backoffs);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
}
static inline int
rdev_set_max_frame_retries(struct cfg802154_registered_device *rdev,
struct wpan_dev *wpan_dev, s8 max_frame_retries)
{
- return rdev->ops->set_max_frame_retries(&rdev->wpan_phy, wpan_dev,
+ int ret;
+
+ trace_802154_rdev_set_max_frame_retries(&rdev->wpan_phy, wpan_dev,
max_frame_retries);
+ ret = rdev->ops->set_max_frame_retries(&rdev->wpan_phy, wpan_dev,
+ max_frame_retries);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
}
static inline int
rdev_set_lbt_mode(struct cfg802154_registered_device *rdev,
struct wpan_dev *wpan_dev, bool mode)
{
- return rdev->ops->set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode);
+ int ret;
+
+ trace_802154_rdev_set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode);
+ ret = rdev->ops->set_lbt_mode(&rdev->wpan_phy, wpan_dev, mode);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
}
#endif /* __CFG802154_RDEV_OPS */
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index b60c65f70346..7aaaf967df58 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -1014,7 +1014,7 @@ static int ieee802154_create(struct net *net, struct socket *sock,
}
rc = -ENOMEM;
- sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto);
+ sk = sk_alloc(net, PF_IEEE802154, GFP_KERNEL, proto, kern);
if (!sk)
goto out;
rc = 0;
diff --git a/net/ieee802154/trace.c b/net/ieee802154/trace.c
new file mode 100644
index 000000000000..95f997fad755
--- /dev/null
+++ b/net/ieee802154/trace.c
@@ -0,0 +1,7 @@
+#include <linux/module.h>
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+#endif
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
new file mode 100644
index 000000000000..5ac25eb6ed17
--- /dev/null
+++ b/net/ieee802154/trace.h
@@ -0,0 +1,247 @@
+/* Based on net/wireless/tracing.h */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cfg802154
+
+#if !defined(__RDEV_CFG802154_OPS_TRACE) || defined(TRACE_HEADER_MULTI_READ)
+#define __RDEV_CFG802154_OPS_TRACE
+
+#include <linux/tracepoint.h>
+
+#include <net/cfg802154.h>
+
+#define MAXNAME 32
+#define WPAN_PHY_ENTRY __array(char, wpan_phy_name, MAXNAME)
+#define WPAN_PHY_ASSIGN strlcpy(__entry->wpan_phy_name, \
+ wpan_phy_name(wpan_phy), \
+ MAXNAME)
+#define WPAN_PHY_PR_FMT "%s"
+#define WPAN_PHY_PR_ARG __entry->wpan_phy_name
+
+#define WPAN_DEV_ENTRY __field(u32, identifier)
+#define WPAN_DEV_ASSIGN (__entry->identifier) = (!IS_ERR_OR_NULL(wpan_dev) \
+ ? wpan_dev->identifier : 0)
+#define WPAN_DEV_PR_FMT "wpan_dev(%u)"
+#define WPAN_DEV_PR_ARG (__entry->identifier)
+
+#define WPAN_CCA_ENTRY __field(enum nl802154_cca_modes, cca_mode) \
+ __field(enum nl802154_cca_opts, cca_opt)
+#define WPAN_CCA_ASSIGN \
+ do { \
+ (__entry->cca_mode) = cca->mode; \
+ (__entry->cca_opt) = cca->opt; \
+ } while (0)
+#define WPAN_CCA_PR_FMT "cca_mode: %d, cca_opt: %d"
+#define WPAN_CCA_PR_ARG __entry->cca_mode, __entry->cca_opt
+
+#define BOOL_TO_STR(bo) (bo) ? "true" : "false"
+
+/*************************************************************
+ * rdev->ops traces *
+ *************************************************************/
+
+TRACE_EVENT(802154_rdev_add_virtual_intf,
+ TP_PROTO(struct wpan_phy *wpan_phy, char *name,
+ enum nl802154_iftype type, __le64 extended_addr),
+ TP_ARGS(wpan_phy, name, type, extended_addr),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ __string(vir_intf_name, name ? name : "<noname>")
+ __field(enum nl802154_iftype, type)
+ __field(__le64, extended_addr)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ __assign_str(vir_intf_name, name ? name : "<noname>");
+ __entry->type = type;
+ __entry->extended_addr = extended_addr;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", virtual intf name: %s, type: %d, ea %llx",
+ WPAN_PHY_PR_ARG, __get_str(vir_intf_name), __entry->type,
+ __le64_to_cpu(__entry->extended_addr))
+);
+
+TRACE_EVENT(802154_rdev_del_virtual_intf,
+ TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev),
+ TP_ARGS(wpan_phy, wpan_dev),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ WPAN_DEV_ENTRY
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ WPAN_DEV_ASSIGN;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT, WPAN_PHY_PR_ARG,
+ WPAN_DEV_PR_ARG)
+);
+
+TRACE_EVENT(802154_rdev_set_channel,
+ TP_PROTO(struct wpan_phy *wpan_phy, u8 page, u8 channel),
+ TP_ARGS(wpan_phy, page, channel),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ __field(u8, page)
+ __field(u8, channel)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ __entry->page = page;
+ __entry->channel = channel;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", page: %d, channel: %d", WPAN_PHY_PR_ARG,
+ __entry->page, __entry->channel)
+);
+
+TRACE_EVENT(802154_rdev_set_cca_mode,
+ TP_PROTO(struct wpan_phy *wpan_phy, const struct wpan_phy_cca *cca),
+ TP_ARGS(wpan_phy, cca),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ WPAN_CCA_ENTRY
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ WPAN_CCA_ASSIGN;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_CCA_PR_FMT, WPAN_PHY_PR_ARG,
+ WPAN_CCA_PR_ARG)
+);
+
+DECLARE_EVENT_CLASS(802154_le16_template,
+ TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ __le16 le16arg),
+ TP_ARGS(wpan_phy, wpan_dev, le16arg),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ WPAN_DEV_ENTRY
+ __field(__le16, le16arg)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ WPAN_DEV_ASSIGN;
+ __entry->le16arg = le16arg;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", pan id: 0x%04x",
+ WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG,
+ __le16_to_cpu(__entry->le16arg))
+);
+
+DEFINE_EVENT(802154_le16_template, 802154_rdev_set_pan_id,
+ TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ __le16 le16arg),
+ TP_ARGS(wpan_phy, wpan_dev, le16arg)
+);
+
+DEFINE_EVENT_PRINT(802154_le16_template, 802154_rdev_set_short_addr,
+ TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ __le16 le16arg),
+ TP_ARGS(wpan_phy, wpan_dev, le16arg),
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", sa: 0x%04x",
+ WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG,
+ __le16_to_cpu(__entry->le16arg))
+);
+
+TRACE_EVENT(802154_rdev_set_backoff_exponent,
+ TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ u8 min_be, u8 max_be),
+ TP_ARGS(wpan_phy, wpan_dev, min_be, max_be),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ WPAN_DEV_ENTRY
+ __field(u8, min_be)
+ __field(u8, max_be)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ WPAN_DEV_ASSIGN;
+ __entry->min_be = min_be;
+ __entry->max_be = max_be;
+ ),
+
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT
+ ", min be: %d, max_be: %d", WPAN_PHY_PR_ARG,
+ WPAN_DEV_PR_ARG, __entry->min_be, __entry->max_be)
+);
+
+TRACE_EVENT(802154_rdev_set_csma_backoffs,
+ TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ u8 max_csma_backoffs),
+ TP_ARGS(wpan_phy, wpan_dev, max_csma_backoffs),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ WPAN_DEV_ENTRY
+ __field(u8, max_csma_backoffs)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ WPAN_DEV_ASSIGN;
+ __entry->max_csma_backoffs = max_csma_backoffs;
+ ),
+
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT
+ ", max csma backoffs: %d", WPAN_PHY_PR_ARG,
+ WPAN_DEV_PR_ARG, __entry->max_csma_backoffs)
+);
+
+TRACE_EVENT(802154_rdev_set_max_frame_retries,
+ TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ s8 max_frame_retries),
+ TP_ARGS(wpan_phy, wpan_dev, max_frame_retries),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ WPAN_DEV_ENTRY
+ __field(s8, max_frame_retries)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ WPAN_DEV_ASSIGN;
+ __entry->max_frame_retries = max_frame_retries;
+ ),
+
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT
+ ", max frame retries: %d", WPAN_PHY_PR_ARG,
+ WPAN_DEV_PR_ARG, __entry->max_frame_retries)
+);
+
+TRACE_EVENT(802154_rdev_set_lbt_mode,
+ TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ bool mode),
+ TP_ARGS(wpan_phy, wpan_dev, mode),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ WPAN_DEV_ENTRY
+ __field(bool, mode)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ WPAN_DEV_ASSIGN;
+ __entry->mode = mode;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT
+ ", lbt mode: %s", WPAN_PHY_PR_ARG,
+ WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->mode))
+);
+
+TRACE_EVENT(802154_rdev_return_int,
+ TP_PROTO(struct wpan_phy *wpan_phy, int ret),
+ TP_ARGS(wpan_phy, ret),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ __field(int, ret)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ __entry->ret = ret;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", returned: %d", WPAN_PHY_PR_ARG,
+ __entry->ret)
+);
+
+#endif /* !__RDEV_CFG802154_OPS_TRACE || TRACE_HEADER_MULTI_READ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index bd2901604842..d83071dccd74 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -331,8 +331,8 @@ config NET_FOU_IP_TUNNELS
When this option is enabled IP tunnels can be configured to use
FOU or GUE encapsulation.
-config GENEVE
- tristate "Generic Network Virtualization Encapsulation (Geneve)"
+config GENEVE_CORE
+ tristate "Generic Network Virtualization Encapsulation library"
depends on INET
select NET_UDP_TUNNEL
---help---
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 518c04ed666e..b36236dd6014 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -56,7 +56,7 @@ obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
-obj-$(CONFIG_GENEVE) += geneve.o
+obj-$(CONFIG_GENEVE_CORE) += geneve_core.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
xfrm4_output.o xfrm4_protocol.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8b47a4d79d04..235d36afece3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -317,7 +317,7 @@ lookup_protocol:
WARN_ON(!answer_prot->slab);
err = -ENOBUFS;
- sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);
+ sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
if (!sk)
goto out;
@@ -1430,7 +1430,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
struct net *net)
{
struct socket *sock;
- int rc = sock_create_kern(family, type, protocol, &sock);
+ int rc = sock_create_kern(net, family, type, protocol, &sock);
if (rc == 0) {
*sk = sock->sk;
@@ -1440,8 +1440,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
* we do not wish this socket to see incoming packets.
*/
(*sk)->sk_prot->unhash(*sk);
-
- sk_change_net(*sk, net);
}
return rc;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 8d695b6659c7..28ec3c1823bf 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -713,8 +713,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
struct hlist_head *dest;
unsigned int new_hash;
- hlist_del(&fi->fib_hash);
-
new_hash = fib_info_hashfn(fi);
dest = &new_info_hash[new_hash];
hlist_add_head(&fi->fib_hash, dest);
@@ -731,8 +729,6 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
struct hlist_head *ldest;
unsigned int new_hash;
- hlist_del(&fi->fib_lhash);
-
new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
ldest = &new_laddrhash[new_hash];
hlist_add_head(&fi->fib_lhash, ldest);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e13fcc602da2..5a5d9bdeaeb4 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1164,14 +1164,15 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
state = fa->fa_state;
new_fa->fa_state = state & ~FA_S_ACCESSED;
new_fa->fa_slen = fa->fa_slen;
+ new_fa->tb_id = tb->tb_id;
- err = netdev_switch_fib_ipv4_add(key, plen, fi,
- new_fa->fa_tos,
- cfg->fc_type,
- cfg->fc_nlflags,
- tb->tb_id);
+ err = switchdev_fib_ipv4_add(key, plen, fi,
+ new_fa->fa_tos,
+ cfg->fc_type,
+ cfg->fc_nlflags,
+ tb->tb_id);
if (err) {
- netdev_switch_fib_ipv4_abort(fi);
+ switchdev_fib_ipv4_abort(fi);
kmem_cache_free(fn_alias_kmem, new_fa);
goto out;
}
@@ -1215,12 +1216,10 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
new_fa->tb_id = tb->tb_id;
/* (Optionally) offload fib entry to switch hardware. */
- err = netdev_switch_fib_ipv4_add(key, plen, fi, tos,
- cfg->fc_type,
- cfg->fc_nlflags,
- tb->tb_id);
+ err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type,
+ cfg->fc_nlflags, tb->tb_id);
if (err) {
- netdev_switch_fib_ipv4_abort(fi);
+ switchdev_fib_ipv4_abort(fi);
goto out_free_new_fa;
}
@@ -1239,7 +1238,7 @@ succeeded:
return 0;
out_sw_fib_del:
- netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
+ switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
out_free_new_fa:
kmem_cache_free(fn_alias_kmem, new_fa);
out:
@@ -1517,8 +1516,8 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
if (!fa_to_delete)
return -ESRCH;
- netdev_switch_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
- cfg->fc_type, tb->tb_id);
+ switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
+ cfg->fc_type, tb->tb_id);
rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
@@ -1764,13 +1763,12 @@ void fib_table_flush_external(struct fib_table *tb)
/* record local slen */
slen = fa->fa_slen;
- if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL))
+ if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD))
continue;
- netdev_switch_fib_ipv4_del(n->key,
- KEYLENGTH - fa->fa_slen,
- fi, fa->fa_tos,
- fa->fa_type, tb->tb_id);
+ switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen,
+ fi, fa->fa_tos, fa->fa_type,
+ tb->tb_id);
}
/* update leaf slen */
@@ -1835,10 +1833,9 @@ int fib_table_flush(struct fib_table *tb)
continue;
}
- netdev_switch_fib_ipv4_del(n->key,
- KEYLENGTH - fa->fa_slen,
- fi, fa->fa_tos,
- fa->fa_type, tb->tb_id);
+ switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen,
+ fi, fa->fa_tos, fa->fa_type,
+ tb->tb_id);
hlist_del_rcu(&fa->fa_list);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve_core.c
index 8986e63f3bda..311a4ba6950a 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve_core.c
@@ -60,11 +60,6 @@ struct geneve_net {
static int geneve_net_id;
-static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
-{
- return (struct genevehdr *)(udp_hdr(skb) + 1);
-}
-
static struct geneve_sock *geneve_find_sock(struct net *net,
sa_family_t family, __be16 port)
{
@@ -435,7 +430,7 @@ static int __init geneve_init_module(void)
if (rc)
return rc;
- pr_info("Geneve driver\n");
+ pr_info("Geneve core logic\n");
return 0;
}
@@ -449,5 +444,4 @@ module_exit(geneve_cleanup_module);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jesse Gross <jesse@nicira.com>");
-MODULE_DESCRIPTION("Driver for GENEVE encapsulated traffic");
-MODULE_ALIAS_RTNL_LINK("geneve");
+MODULE_DESCRIPTION("Driver library for GENEVE encapsulated traffic");
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index a3a697f5ffba..651cdf648ec4 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1339,6 +1339,168 @@ out:
}
EXPORT_SYMBOL(ip_mc_inc_group);
+static int ip_mc_check_iphdr(struct sk_buff *skb)
+{
+ const struct iphdr *iph;
+ unsigned int len;
+ unsigned int offset = skb_network_offset(skb) + sizeof(*iph);
+
+ if (!pskb_may_pull(skb, offset))
+ return -EINVAL;
+
+ iph = ip_hdr(skb);
+
+ if (iph->version != 4 || ip_hdrlen(skb) < sizeof(*iph))
+ return -EINVAL;
+
+ offset += ip_hdrlen(skb) - sizeof(*iph);
+
+ if (!pskb_may_pull(skb, offset))
+ return -EINVAL;
+
+ iph = ip_hdr(skb);
+
+ if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
+ return -EINVAL;
+
+ len = skb_network_offset(skb) + ntohs(iph->tot_len);
+ if (skb->len < len || len < offset)
+ return -EINVAL;
+
+ skb_set_transport_header(skb, offset);
+
+ return 0;
+}
+
+static int ip_mc_check_igmp_reportv3(struct sk_buff *skb)
+{
+ unsigned int len = skb_transport_offset(skb);
+
+ len += sizeof(struct igmpv3_report);
+
+ return pskb_may_pull(skb, len) ? 0 : -EINVAL;
+}
+
+static int ip_mc_check_igmp_query(struct sk_buff *skb)
+{
+ unsigned int len = skb_transport_offset(skb);
+
+ len += sizeof(struct igmphdr);
+ if (skb->len < len)
+ return -EINVAL;
+
+ /* IGMPv{1,2}? */
+ if (skb->len != len) {
+ /* or IGMPv3? */
+ len += sizeof(struct igmpv3_query) - sizeof(struct igmphdr);
+ if (skb->len < len || !pskb_may_pull(skb, len))
+ return -EINVAL;
+ }
+
+ /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer
+ * all-systems destination addresses (224.0.0.1) for general queries
+ */
+ if (!igmp_hdr(skb)->group &&
+ ip_hdr(skb)->daddr != htonl(INADDR_ALLHOSTS_GROUP))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ip_mc_check_igmp_msg(struct sk_buff *skb)
+{
+ switch (igmp_hdr(skb)->type) {
+ case IGMP_HOST_LEAVE_MESSAGE:
+ case IGMP_HOST_MEMBERSHIP_REPORT:
+ case IGMPV2_HOST_MEMBERSHIP_REPORT:
+ /* fall through */
+ return 0;
+ case IGMPV3_HOST_MEMBERSHIP_REPORT:
+ return ip_mc_check_igmp_reportv3(skb);
+ case IGMP_HOST_MEMBERSHIP_QUERY:
+ return ip_mc_check_igmp_query(skb);
+ default:
+ return -ENOMSG;
+ }
+}
+
+static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb)
+{
+ return skb_checksum_simple_validate(skb);
+}
+
+static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
+
+{
+ struct sk_buff *skb_chk;
+ unsigned int transport_len;
+ unsigned int len = skb_transport_offset(skb) + sizeof(struct igmphdr);
+ int ret;
+
+ transport_len = ntohs(ip_hdr(skb)->tot_len) - ip_hdrlen(skb);
+
+ skb_get(skb);
+ skb_chk = skb_checksum_trimmed(skb, transport_len,
+ ip_mc_validate_checksum);
+ if (!skb_chk)
+ return -EINVAL;
+
+ if (!pskb_may_pull(skb_chk, len)) {
+ kfree_skb(skb_chk);
+ return -EINVAL;
+ }
+
+ ret = ip_mc_check_igmp_msg(skb_chk);
+ if (ret) {
+ kfree_skb(skb_chk);
+ return ret;
+ }
+
+ if (skb_trimmed)
+ *skb_trimmed = skb_chk;
+ else
+ kfree_skb(skb_chk);
+
+ return 0;
+}
+
+/**
+ * ip_mc_check_igmp - checks whether this is a sane IGMP packet
+ * @skb: the skb to validate
+ * @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional)
+ *
+ * Checks whether an IPv4 packet is a valid IGMP packet. If so sets
+ * skb network and transport headers accordingly and returns zero.
+ *
+ * -EINVAL: A broken packet was detected, i.e. it violates some internet
+ * standard
+ * -ENOMSG: IP header validation succeeded but it is not an IGMP packet.
+ * -ENOMEM: A memory allocation failure happened.
+ *
+ * Optionally, an skb pointer might be provided via skb_trimmed (or set it
+ * to NULL): After parsing an IGMP packet successfully it will point to
+ * an skb which has its tail aligned to the IP packet end. This might
+ * either be the originally provided skb or a trimmed, cloned version if
+ * the skb frame had data beyond the IP packet. A cloned skb allows us
+ * to leave the original skb and its full frame unchanged (which might be
+ * desirable for layer 2 frame jugglers).
+ *
+ * The caller needs to release a reference count from any returned skb_trimmed.
+ */
+int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
+{
+ int ret = ip_mc_check_iphdr(skb);
+
+ if (ret < 0)
+ return ret;
+
+ if (ip_hdr(skb)->protocol != IPPROTO_IGMP)
+ return -ENOMSG;
+
+ return __ip_mc_check_igmp(skb, skb_trimmed);
+}
+EXPORT_SYMBOL(ip_mc_check_igmp);
+
/*
* Resend IGMP JOIN report; used by netdev notifier.
*/
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8976ca423a07..60021d0d9326 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -99,6 +99,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
struct net *net = sock_net(sk);
int smallest_size = -1, smallest_rover;
kuid_t uid = sock_i_uid(sk);
+ int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
local_bh_disable();
if (!snum) {
@@ -106,6 +107,14 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
again:
inet_get_local_port_range(net, &low, &high);
+ if (attempt_half) {
+ int half = low + ((high - low) >> 1);
+
+ if (attempt_half == 1)
+ high = half;
+ else
+ low = half;
+ }
remaining = (high - low) + 1;
smallest_rover = rover = prandom_u32() % remaining + low;
@@ -127,11 +136,6 @@ again:
(tb->num_owners < smallest_size || smallest_size == -1)) {
smallest_size = tb->num_owners;
smallest_rover = rover;
- if (atomic_read(&hashinfo->bsockets) > (high - low) + 1 &&
- !inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
- snum = smallest_rover;
- goto tb_found;
- }
}
if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
snum = rover;
@@ -159,6 +163,11 @@ again:
snum = smallest_rover;
goto have_snum;
}
+ if (attempt_half == 1) {
+ /* OK we now try the upper half of the range */
+ attempt_half = 2;
+ goto again;
+ }
goto fail;
}
/* OK, here is the one we will use. HEAD is
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index bb77ebdae3b3..4d32262c7502 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -224,14 +224,16 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
handler->idiag_get_info(sk, r, info);
if (sk->sk_state < TCP_TIME_WAIT) {
- int err = 0;
+ union tcp_cc_info info;
+ size_t sz = 0;
+ int attr;
rcu_read_lock();
ca_ops = READ_ONCE(icsk->icsk_ca_ops);
if (ca_ops && ca_ops->get_info)
- err = ca_ops->get_info(sk, ext, skb);
+ sz = ca_ops->get_info(sk, ext, &attr, &info);
rcu_read_unlock();
- if (err < 0)
+ if (sz && nla_put(skb, attr, sz, &info) < 0)
goto errout;
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index c6fb80bd5826..3766bddb3e8a 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -90,10 +90,6 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
const unsigned short snum)
{
- struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
-
- atomic_inc(&hashinfo->bsockets);
-
inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &tb->owners);
tb->num_owners++;
@@ -111,8 +107,6 @@ static void __inet_put_port(struct sock *sk)
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
struct inet_bind_bucket *tb;
- atomic_dec(&hashinfo->bsockets);
-
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
__sk_del_bind_node(sk);
@@ -608,7 +602,6 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
{
int i;
- atomic_set(&h->bsockets, 0);
for (i = 0; i < INET_LHTABLE_SIZE; i++) {
spin_lock_init(&h->listening_hash[i].lock);
INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 00ec8d5d7e7e..2ffbd16b79e0 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -170,7 +170,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
}
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
-void tw_timer_handler(unsigned long data)
+static void tw_timer_handler(unsigned long data)
{
struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cc1da6d9cb35..47fa64ee82b1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -173,6 +173,15 @@ static void ipq_kill(struct ipq *ipq)
inet_frag_kill(&ipq->q, &ip4_frags);
}
+static bool frag_expire_skip_icmp(u32 user)
+{
+ return user == IP_DEFRAG_AF_PACKET ||
+ ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN,
+ __IP_DEFRAG_CONNTRACK_IN_END) ||
+ ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_BRIDGE_IN,
+ __IP_DEFRAG_CONNTRACK_BRIDGE_IN);
+}
+
/*
* Oops, a fragment queue timed out. Kill it and send an ICMP reply.
*/
@@ -217,10 +226,8 @@ static void ip_expire(unsigned long arg)
/* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
*/
- if (qp->user == IP_DEFRAG_AF_PACKET ||
- ((qp->user >= IP_DEFRAG_CONNTRACK_IN) &&
- (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) &&
- (skb_rtable(head)->rt_type != RTN_LOCAL)))
+ if (frag_expire_skip_icmp(qp->user) &&
+ (skb_rtable(head)->rt_type != RTN_LOCAL))
goto out_rcu_unlock;
/* Send an ICMP "Fragment Reassembly Timeout" message. */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c65b93a7b711..8d91b922fcfe 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -83,6 +83,9 @@
int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
EXPORT_SYMBOL(sysctl_ip_default_ttl);
+static int ip_fragment(struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct sock *, struct sk_buff *));
+
/* Generate a checksum for an outgoing IP datagram. */
void ip_send_check(struct iphdr *iph)
{
@@ -91,7 +94,7 @@ void ip_send_check(struct iphdr *iph)
}
EXPORT_SYMBOL(ip_send_check);
-int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
+static int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);
@@ -478,6 +481,28 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
+static int ip_fragment(struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct sock *, struct sk_buff *))
+{
+ struct iphdr *iph = ip_hdr(skb);
+ unsigned int mtu = ip_skb_dst_mtu(skb);
+
+ if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
+ (IPCB(skb)->frag_max_size &&
+ IPCB(skb)->frag_max_size > mtu))) {
+ struct rtable *rt = skb_rtable(skb);
+ struct net_device *dev = rt->dst.dev;
+
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ return ip_do_fragment(sk, skb, output);
+}
+
/*
* This IP datagram is too large to be sent in one piece. Break it up into
* smaller pieces (each of size equal to IP header plus
@@ -485,8 +510,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
* single device frame, and queue such a frame for sending.
*/
-int ip_fragment(struct sock *sk, struct sk_buff *skb,
- int (*output)(struct sock *, struct sk_buff *))
+int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct sock *, struct sk_buff *))
{
struct iphdr *iph;
int ptr;
@@ -507,15 +532,6 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb,
iph = ip_hdr(skb);
mtu = ip_skb_dst_mtu(skb);
- if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
- (IPCB(skb)->frag_max_size &&
- IPCB(skb)->frag_max_size > mtu))) {
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
- kfree_skb(skb);
- return -EMSGSIZE;
- }
/*
* Setup starting values.
@@ -751,7 +767,7 @@ fail:
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
return err;
}
-EXPORT_SYMBOL(ip_fragment);
+EXPORT_SYMBOL(ip_do_fragment);
int
ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index ce63ab21b6cd..6a51a71a6c67 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -98,7 +98,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
return -ENOMEM;
eh = (struct ethhdr *)skb->data;
- if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN))
+ if (likely(eth_proto_is_802_3(eh->h_proto)))
skb->protocol = eh->h_proto;
else
skb->protocol = htons(ETH_P_802_2);
@@ -165,6 +165,8 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
{
int i;
+ netdev_stats_to_stats64(tot, &dev->stats);
+
for_each_possible_cpu(i) {
const struct pcpu_sw_netstats *tstats =
per_cpu_ptr(dev->tstats, i);
@@ -185,22 +187,6 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
tot->tx_bytes += tx_bytes;
}
- tot->multicast = dev->stats.multicast;
-
- tot->rx_crc_errors = dev->stats.rx_crc_errors;
- tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
- tot->rx_length_errors = dev->stats.rx_length_errors;
- tot->rx_frame_errors = dev->stats.rx_frame_errors;
- tot->rx_errors = dev->stats.rx_errors;
-
- tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
- tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
- tot->tx_dropped = dev->stats.tx_dropped;
- tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
- tot->tx_errors = dev->stats.tx_errors;
-
- tot->collisions = dev->stats.collisions;
-
return tot;
}
EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ff96396ebec5..254238daf58b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -251,7 +251,8 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
return -EINVAL;
}
- p.i_key = p.o_key = p.i_flags = p.o_flags = 0;
+ p.i_key = p.o_key = 0;
+ p.i_flags = p.o_flags = 0;
if (p.iph.ttl)
p.iph.frag_off |= htons(IP_DF);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 13bfe84bf3ca..a61200754f4b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1075,6 +1075,9 @@ static int do_replace(struct net *net, const void __user *user,
/* overflow check */
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
+ if (tmp.num_counters == 0)
+ return -EINVAL;
+
tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
@@ -1499,6 +1502,9 @@ static int compat_do_replace(struct net *net, void __user *user,
return -ENOMEM;
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
+ if (tmp.num_counters == 0)
+ return -EINVAL;
+
tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index c69db7fa25ee..2d0e265fef6e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1262,6 +1262,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
/* overflow check */
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
+ if (tmp.num_counters == 0)
+ return -EINVAL;
+
tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
@@ -1809,6 +1812,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return -ENOMEM;
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
+ if (tmp.num_counters == 0)
+ return -EINVAL;
+
tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 771ab3d01ad3..45cb16a6a4a3 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -367,6 +367,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
struct clusterip_config *config;
int ret;
+ if (par->nft_compat) {
+ pr_err("cannot use CLUSTERIP target from nftables compat\n");
+ return -EOPNOTSUPP;
+ }
+
if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index e9e67793055f..fe8cc183411e 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -18,7 +18,7 @@
#include <net/netfilter/nf_conntrack_synproxy.h>
static struct iphdr *
-synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr)
+synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
{
struct iphdr *iph;
@@ -220,7 +220,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
nth->ack_seq = th->ack_seq;
tcp_flag_word(nth) = TCP_FLAG_ACK;
nth->doff = tcp_hdr_size / 4;
- nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->window = htons(ntohs(th->window) >> opts->wscale);
nth->check = 0;
nth->urg_ptr = 0;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index a93f260cf24c..05ff44b758df 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -158,6 +158,7 @@ void ping_unhash(struct sock *sk)
if (sk_hashed(sk)) {
write_lock_bh(&ping_table.lock);
hlist_nulls_del(&sk->sk_nulls_node);
+ sk_nulls_node_init(&sk->sk_nulls_node);
sock_put(sk);
isk->inet_num = 0;
isk->inet_sport = 0;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e1f3b911dd1e..da5d483e236a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -298,6 +298,8 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2),
SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT),
SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE),
+ SNMP_MIB_ITEM("TCPWinProbe", LINUX_MIB_TCPWINPROBE),
+ SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a78540f28276..f6055984c307 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -457,12 +457,9 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
}
#define IP_IDENTS_SZ 2048u
-struct ip_ident_bucket {
- atomic_t id;
- u32 stamp32;
-};
-static struct ip_ident_bucket *ip_idents __read_mostly;
+static atomic_t *ip_idents __read_mostly;
+static u32 *ip_tstamps __read_mostly;
/* In order to protect privacy, we add a perturbation to identifiers
* if one generator is seldom used. This makes hard for an attacker
@@ -470,15 +467,16 @@ static struct ip_ident_bucket *ip_idents __read_mostly;
*/
u32 ip_idents_reserve(u32 hash, int segs)
{
- struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ;
- u32 old = ACCESS_ONCE(bucket->stamp32);
+ u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
+ atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
+ u32 old = ACCESS_ONCE(*p_tstamp);
u32 now = (u32)jiffies;
u32 delta = 0;
- if (old != now && cmpxchg(&bucket->stamp32, old, now) == old)
+ if (old != now && cmpxchg(p_tstamp, old, now) == old)
delta = prandom_u32_max(now - old);
- return atomic_add_return(segs + delta, &bucket->id) - segs;
+ return atomic_add_return(segs + delta, p_id) - segs;
}
EXPORT_SYMBOL(ip_idents_reserve);
@@ -902,6 +900,10 @@ static int ip_error(struct sk_buff *skb)
bool send;
int code;
+ /* IP on this device is disabled. */
+ if (!in_dev)
+ goto out;
+
net = dev_net(rt->dst.dev);
if (!IN_DEV_FORWARD(in_dev)) {
switch (rt->dst.error) {
@@ -962,10 +964,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
if (dst_metric_locked(dst, RTAX_MTU))
return;
- if (dst->dev->mtu < mtu)
- return;
-
- if (rt->rt_pmtu && rt->rt_pmtu < mtu)
+ if (ipv4_mtu(dst) < mtu)
return;
if (mtu < ip_rt_min_pmtu)
@@ -2096,7 +2095,8 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
goto out;
}
if (ipv4_is_local_multicast(fl4->daddr) ||
- ipv4_is_lbcast(fl4->daddr)) {
+ ipv4_is_lbcast(fl4->daddr) ||
+ fl4->flowi4_proto == IPPROTO_IGMP) {
if (!fl4->saddr)
fl4->saddr = inet_select_addr(dev_out, 0,
RT_SCOPE_LINK);
@@ -2741,6 +2741,10 @@ int __init ip_rt_init(void)
prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+ ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
+ if (!ip_tstamps)
+ panic("IP: failed to allocate ip_tstamps\n");
+
for_each_possible_cpu(cpu) {
struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index c3852a7ff3c7..841de32f1fee 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -821,6 +821,13 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "tcp_ecn_fallback",
+ .data = &init_net.ipv4.sysctl_tcp_ecn_fallback,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "ip_local_port_range",
.maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
.data = &init_net.ipv4.ip_local_ports.range,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8c5cd9efebbc..90afcec3f427 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -252,6 +252,7 @@
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/poll.h>
+#include <linux/inet_diag.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/skbuff.h>
@@ -401,6 +402,7 @@ void tcp_init_sock(struct sock *sk)
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = TCP_MSS_DEFAULT;
+ u64_stats_init(&tp->syncp);
tp->reordering = sysctl_tcp_reordering;
tcp_enable_early_retrans(tp);
@@ -807,16 +809,28 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
EXPORT_SYMBOL(tcp_splice_read);
-struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
+struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
+ bool force_schedule)
{
struct sk_buff *skb;
/* The TCP header must be at least 32-bit aligned. */
size = ALIGN(size, 4);
+ if (unlikely(tcp_under_memory_pressure(sk)))
+ sk_mem_reclaim_partial(sk);
+
skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
- if (skb) {
- if (sk_wmem_schedule(sk, skb->truesize)) {
+ if (likely(skb)) {
+ bool mem_scheduled;
+
+ if (force_schedule) {
+ mem_scheduled = true;
+ sk_forced_mem_schedule(sk, skb->truesize);
+ } else {
+ mem_scheduled = sk_wmem_schedule(sk, skb->truesize);
+ }
+ if (likely(mem_scheduled)) {
skb_reserve(skb, sk->sk_prot->max_header);
/*
* Make sure that we have exactly size bytes
@@ -906,7 +920,8 @@ new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
- skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
+ skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
+ skb_queue_empty(&sk->sk_write_queue));
if (!skb)
goto wait_for_memory;
@@ -985,6 +1000,9 @@ do_error:
if (copied)
goto out;
out_err:
+ /* make sure we wake any epoll edge trigger waiter */
+ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+ sk->sk_write_space(sk);
return sk_stream_error(sk, flags, err);
}
@@ -1142,7 +1160,8 @@ new_segment:
skb = sk_stream_alloc_skb(sk,
select_size(sk, sg),
- sk->sk_allocation);
+ sk->sk_allocation,
+ skb_queue_empty(&sk->sk_write_queue));
if (!skb)
goto wait_for_memory;
@@ -1273,6 +1292,9 @@ do_error:
goto out;
out_err:
err = sk_stream_error(sk, flags, err);
+ /* make sure we wake any epoll edge trigger waiter */
+ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+ sk->sk_write_space(sk);
release_sock(sk);
return err;
}
@@ -2481,6 +2503,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
icsk->icsk_syn_retries = val;
break;
+ case TCP_SAVE_SYN:
+ if (val < 0 || val > 1)
+ err = -EINVAL;
+ else
+ tp->save_syn = val;
+ break;
+
case TCP_LINGER2:
if (val < 0)
tp->linger2 = -1;
@@ -2592,11 +2621,12 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
#endif
/* Return information about state of tcp endpoint in API format. */
-void tcp_get_info(const struct sock *sk, struct tcp_info *info)
+void tcp_get_info(struct sock *sk, struct tcp_info *info)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
+ unsigned int start;
u32 rate;
memset(info, 0, sizeof(*info));
@@ -2663,6 +2693,14 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
rate = READ_ONCE(sk->sk_max_pacing_rate);
info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&tp->syncp);
+ info->tcpi_bytes_acked = tp->bytes_acked;
+ info->tcpi_bytes_received = tp->bytes_received;
+ } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
+ info->tcpi_segs_out = tp->segs_out;
+ info->tcpi_segs_in = tp->segs_in;
}
EXPORT_SYMBOL_GPL(tcp_get_info);
@@ -2734,6 +2772,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return -EFAULT;
return 0;
}
+ case TCP_CC_INFO: {
+ const struct tcp_congestion_ops *ca_ops;
+ union tcp_cc_info info;
+ size_t sz = 0;
+ int attr;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ ca_ops = icsk->icsk_ca_ops;
+ if (ca_ops && ca_ops->get_info)
+ sz = ca_ops->get_info(sk, ~0U, &attr, &info);
+
+ len = min_t(unsigned int, len, sz);
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &info, len))
+ return -EFAULT;
+ return 0;
+ }
case TCP_QUICKACK:
val = !icsk->icsk_ack.pingpong;
break;
@@ -2792,6 +2850,42 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
case TCP_NOTSENT_LOWAT:
val = tp->notsent_lowat;
break;
+ case TCP_SAVE_SYN:
+ val = tp->save_syn;
+ break;
+ case TCP_SAVED_SYN: {
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ lock_sock(sk);
+ if (tp->saved_syn) {
+ if (len < tp->saved_syn[0]) {
+ if (put_user(tp->saved_syn[0], optlen)) {
+ release_sock(sk);
+ return -EFAULT;
+ }
+ release_sock(sk);
+ return -EINVAL;
+ }
+ len = tp->saved_syn[0];
+ if (put_user(len, optlen)) {
+ release_sock(sk);
+ return -EFAULT;
+ }
+ if (copy_to_user(optval, tp->saved_syn + 1, len)) {
+ release_sock(sk);
+ return -EFAULT;
+ }
+ tcp_saved_syn_free(tp);
+ release_sock(sk);
+ } else {
+ release_sock(sk);
+ len = 0;
+ if (put_user(len, optlen))
+ return -EFAULT;
+ }
+ return 0;
+ }
default:
return -ENOPROTOOPT;
}
@@ -2996,11 +3090,12 @@ __setup("thash_entries=", set_thash_entries);
static void __init tcp_init_mem(void)
{
- unsigned long limit = nr_free_buffer_pages() / 8;
+ unsigned long limit = nr_free_buffer_pages() / 16;
+
limit = max(limit, 128UL);
- sysctl_tcp_mem[0] = limit / 4 * 3;
- sysctl_tcp_mem[1] = limit;
- sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
+ sysctl_tcp_mem[0] = limit / 4 * 3; /* 4.68 % */
+ sysctl_tcp_mem[1] = limit; /* 6.25 % */
+ sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; /* 9.37 % */
}
void __init tcp_init(void)
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 4376016f7fa5..4c41c1287197 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -277,7 +277,8 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
}
}
-static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
+ union tcp_cc_info *info)
{
const struct dctcp *ca = inet_csk_ca(sk);
@@ -286,18 +287,17 @@ static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
*/
if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) ||
ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
- struct tcp_dctcp_info info;
-
- memset(&info, 0, sizeof(info));
+ memset(info, 0, sizeof(struct tcp_dctcp_info));
if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) {
- info.dctcp_enabled = 1;
- info.dctcp_ce_state = (u16) ca->ce_state;
- info.dctcp_alpha = ca->dctcp_alpha;
- info.dctcp_ab_ecn = ca->acked_bytes_ecn;
- info.dctcp_ab_tot = ca->acked_bytes_total;
+ info->dctcp.dctcp_enabled = 1;
+ info->dctcp.dctcp_ce_state = (u16) ca->ce_state;
+ info->dctcp.dctcp_alpha = ca->dctcp_alpha;
+ info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn;
+ info->dctcp.dctcp_ab_tot = ca->acked_bytes_total;
}
- return nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info);
+ *attr = INET_DIAG_DCTCPINFO;
+ return sizeof(*info);
}
return 0;
}
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index e3d87aca6be8..46b087a27503 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -206,6 +206,11 @@ static bool tcp_fastopen_create_child(struct sock *sk,
skb_set_owner_r(skb2, child);
__skb_queue_tail(&child->sk_receive_queue, skb2);
tp->syn_data_acked = 1;
+
+ /* u64_stats_update_begin(&tp->syncp) not needed here,
+ * as we certainly are not changing upper 32bit value (0)
+ */
+ tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1;
} else {
end_seq = TCP_SKB_CB(skb)->seq + 1;
}
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 67476f085e48..f71002e4db0b 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -300,24 +300,25 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
}
/* Extract info for Tcp socket info provided via netlink. */
-static int tcp_illinois_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
+ union tcp_cc_info *info)
{
const struct illinois *ca = inet_csk_ca(sk);
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
- struct tcpvegas_info info = {
- .tcpv_enabled = 1,
- .tcpv_rttcnt = ca->cnt_rtt,
- .tcpv_minrtt = ca->base_rtt,
- };
+ info->vegas.tcpv_enabled = 1;
+ info->vegas.tcpv_rttcnt = ca->cnt_rtt;
+ info->vegas.tcpv_minrtt = ca->base_rtt;
+ info->vegas.tcpv_rtt = 0;
- if (info.tcpv_rttcnt > 0) {
+ if (info->vegas.tcpv_rttcnt > 0) {
u64 t = ca->sum_rtt;
- do_div(t, info.tcpv_rttcnt);
- info.tcpv_rtt = t;
+ do_div(t, info->vegas.tcpv_rttcnt);
+ info->vegas.tcpv_rtt = t;
}
- return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+ *attr = INET_DIAG_VEGASINFO;
+ return sizeof(struct tcpvegas_info);
}
return 0;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3a4d9b34bed4..15c4536188a4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -359,7 +359,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
/* Check #1 */
if (tp->rcv_ssthresh < tp->window_clamp &&
(int)tp->rcv_ssthresh < tcp_space(sk) &&
- !sk_under_memory_pressure(sk)) {
+ !tcp_under_memory_pressure(sk)) {
int incr;
/* Check #2. Increase window, if skb with such overhead
@@ -446,7 +446,7 @@ static void tcp_clamp_window(struct sock *sk)
if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
- !sk_under_memory_pressure(sk) &&
+ !tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
sysctl_tcp_rmem[2]);
@@ -1130,7 +1130,12 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
struct tcp_sacktag_state {
int reord;
int fack_count;
- long rtt_us; /* RTT measured by SACKing never-retransmitted data */
+ /* Timestamps for earliest and latest never-retransmitted segment
+ * that was SACKed. RTO needs the earliest RTT to stay conservative,
+ * but congestion control should still get an accurate delay signal.
+ */
+ struct skb_mstamp first_sackt;
+ struct skb_mstamp last_sackt;
int flag;
};
@@ -1233,14 +1238,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
state->reord);
if (!after(end_seq, tp->high_seq))
state->flag |= FLAG_ORIG_SACK_ACKED;
- /* Pick the earliest sequence sacked for RTT */
- if (state->rtt_us < 0) {
- struct skb_mstamp now;
-
- skb_mstamp_get(&now);
- state->rtt_us = skb_mstamp_us_delta(&now,
- xmit_time);
- }
+ if (state->first_sackt.v64 == 0)
+ state->first_sackt = *xmit_time;
+ state->last_sackt = *xmit_time;
}
if (sacked & TCPCB_LOST) {
@@ -1634,7 +1634,7 @@ static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_bl
static int
tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
- u32 prior_snd_una, long *sack_rtt_us)
+ u32 prior_snd_una, struct tcp_sacktag_state *state)
{
struct tcp_sock *tp = tcp_sk(sk);
const unsigned char *ptr = (skb_transport_header(ack_skb) +
@@ -1642,7 +1642,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
struct tcp_sack_block sp[TCP_NUM_SACKS];
struct tcp_sack_block *cache;
- struct tcp_sacktag_state state;
struct sk_buff *skb;
int num_sacks = min(TCP_NUM_SACKS, (ptr[1] - TCPOLEN_SACK_BASE) >> 3);
int used_sacks;
@@ -1650,9 +1649,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
int i, j;
int first_sack_index;
- state.flag = 0;
- state.reord = tp->packets_out;
- state.rtt_us = -1L;
+ state->flag = 0;
+ state->reord = tp->packets_out;
if (!tp->sacked_out) {
if (WARN_ON(tp->fackets_out))
@@ -1663,7 +1661,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
num_sacks, prior_snd_una);
if (found_dup_sack)
- state.flag |= FLAG_DSACKING_ACK;
+ state->flag |= FLAG_DSACKING_ACK;
/* Eliminate too old ACKs, but take into
* account more or less fresh ones, they can
@@ -1728,7 +1726,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
}
skb = tcp_write_queue_head(sk);
- state.fack_count = 0;
+ state->fack_count = 0;
i = 0;
if (!tp->sacked_out) {
@@ -1762,10 +1760,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
/* Head todo? */
if (before(start_seq, cache->start_seq)) {
- skb = tcp_sacktag_skip(skb, sk, &state,
+ skb = tcp_sacktag_skip(skb, sk, state,
start_seq);
skb = tcp_sacktag_walk(skb, sk, next_dup,
- &state,
+ state,
start_seq,
cache->start_seq,
dup_sack);
@@ -1776,7 +1774,7 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
goto advance_sp;
skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
- &state,
+ state,
cache->end_seq);
/* ...tail remains todo... */
@@ -1785,12 +1783,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state.fack_count = tp->fackets_out;
+ state->fack_count = tp->fackets_out;
cache++;
goto walk;
}
- skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq);
+ skb = tcp_sacktag_skip(skb, sk, state, cache->end_seq);
/* Check overlap against next cached too (past this one already) */
cache++;
continue;
@@ -1800,12 +1798,12 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
skb = tcp_highest_sack(sk);
if (!skb)
break;
- state.fack_count = tp->fackets_out;
+ state->fack_count = tp->fackets_out;
}
- skb = tcp_sacktag_skip(skb, sk, &state, start_seq);
+ skb = tcp_sacktag_skip(skb, sk, state, start_seq);
walk:
- skb = tcp_sacktag_walk(skb, sk, next_dup, &state,
+ skb = tcp_sacktag_walk(skb, sk, next_dup, state,
start_seq, end_seq, dup_sack);
advance_sp:
@@ -1820,14 +1818,12 @@ advance_sp:
for (j = 0; j < used_sacks; j++)
tp->recv_sack_cache[i++] = sp[j];
- tcp_mark_lost_retrans(sk);
-
- tcp_verify_left_out(tp);
-
- if ((state.reord < tp->fackets_out) &&
+ if ((state->reord < tp->fackets_out) &&
((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
- tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
+ tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
+ tcp_mark_lost_retrans(sk);
+ tcp_verify_left_out(tp);
out:
#if FASTRETRANS_DEBUG > 0
@@ -1836,8 +1832,7 @@ out:
WARN_ON((int)tp->retrans_out < 0);
WARN_ON((int)tcp_packets_in_flight(tp) < 0);
#endif
- *sack_rtt_us = state.rtt_us;
- return state.flag;
+ return state->flag;
}
/* Limits sacked_out so that sum with lost_out isn't ever larger than
@@ -2700,16 +2695,21 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
struct tcp_sock *tp = tcp_sk(sk);
bool recovered = !before(tp->snd_una, tp->high_seq);
+ if ((flag & FLAG_SND_UNA_ADVANCED) &&
+ tcp_try_undo_loss(sk, false))
+ return;
+
if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
/* Step 3.b. A timeout is spurious if not all data are
* lost, i.e., never-retransmitted data are (s)acked.
*/
- if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED))
+ if ((flag & FLAG_ORIG_SACK_ACKED) &&
+ tcp_try_undo_loss(sk, true))
return;
- if (after(tp->snd_nxt, tp->high_seq) &&
- (flag & FLAG_DATA_SACKED || is_dupack)) {
- tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
+ if (after(tp->snd_nxt, tp->high_seq)) {
+ if (flag & FLAG_DATA_SACKED || is_dupack)
+ tp->frto = 0; /* Step 3.a. loss was real */
} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
tp->high_seq = tp->snd_nxt;
__tcp_push_pending_frames(sk, tcp_current_mss(sk),
@@ -2734,8 +2734,6 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
else if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
}
- if (tcp_try_undo_loss(sk, false))
- return;
tcp_xmit_retransmit_queue(sk);
}
@@ -3054,7 +3052,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
* arrived at the other end.
*/
static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- u32 prior_snd_una, long sack_rtt_us)
+ u32 prior_snd_una,
+ struct tcp_sacktag_state *sack)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct skb_mstamp first_ackt, last_ackt, now;
@@ -3062,8 +3061,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
bool fully_acked = true;
- long ca_seq_rtt_us = -1L;
+ long sack_rtt_us = -1L;
long seq_rtt_us = -1L;
+ long ca_rtt_us = -1L;
struct sk_buff *skb;
u32 pkts_acked = 0;
bool rtt_update;
@@ -3152,15 +3152,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
skb_mstamp_get(&now);
if (likely(first_ackt.v64)) {
seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
- ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+ ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+ }
+ if (sack->first_sackt.v64) {
+ sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
+ ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
}
rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
if (flag & FLAG_ACKED) {
- const struct tcp_congestion_ops *ca_ops
- = inet_csk(sk)->icsk_ca_ops;
-
tcp_rearm_rto(sk);
if (unlikely(icsk->icsk_mtup.probe_size &&
!after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
@@ -3183,11 +3184,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
- if (ca_ops->pkts_acked) {
- long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
- ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
- }
-
} else if (skb && rtt_update && sack_rtt_us >= 0 &&
sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
/* Do not re-arm RTO if the sack RTT is measured from data sent
@@ -3197,6 +3193,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
tcp_rearm_rto(sk);
}
+ if (icsk->icsk_ca_ops->pkts_acked)
+ icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
+
#if FASTRETRANS_DEBUG > 0
WARN_ON((int)tp->sacked_out < 0);
WARN_ON((int)tp->lost_out < 0);
@@ -3237,7 +3236,7 @@ static void tcp_ack_probe(struct sock *sk)
* This function is not for random using!
*/
} else {
- unsigned long when = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
+ unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
when, TCP_RTO_MAX);
@@ -3280,6 +3279,28 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp,
(ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
}
+/* If we update tp->snd_una, also update tp->bytes_acked */
+static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
+{
+ u32 delta = ack - tp->snd_una;
+
+ u64_stats_update_begin(&tp->syncp);
+ tp->bytes_acked += delta;
+ u64_stats_update_end(&tp->syncp);
+ tp->snd_una = ack;
+}
+
+/* If we update tp->rcv_nxt, also update tp->bytes_received */
+static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
+{
+ u32 delta = seq - tp->rcv_nxt;
+
+ u64_stats_update_begin(&tp->syncp);
+ tp->bytes_received += delta;
+ u64_stats_update_end(&tp->syncp);
+ tp->rcv_nxt = seq;
+}
+
/* Update our send window.
*
* Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
@@ -3315,7 +3336,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
}
}
- tp->snd_una = ack;
+ tcp_snd_una_update(tp, ack);
return flag;
}
@@ -3443,6 +3464,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_sacktag_state sack_state;
u32 prior_snd_una = tp->snd_una;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
@@ -3451,7 +3473,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
int prior_packets = tp->packets_out;
const int prior_unsacked = tp->packets_out - tp->sacked_out;
int acked = 0; /* Number of packets newly acked */
- long sack_rtt_us = -1L;
+
+ sack_state.first_sackt.v64 = 0;
/* We very likely will need to access write queue head. */
prefetchw(sk->sk_write_queue.next);
@@ -3497,7 +3520,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* Note, we use the fact that SND.UNA>=SND.WL2.
*/
tcp_update_wl(tp, ack_seq);
- tp->snd_una = ack;
+ tcp_snd_una_update(tp, ack);
flag |= FLAG_WIN_UPDATE;
tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
@@ -3515,7 +3538,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (TCP_SKB_CB(skb)->sacked)
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
- &sack_rtt_us);
+ &sack_state);
if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
flag |= FLAG_ECE;
@@ -3540,7 +3563,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
acked = tp->packets_out;
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
- sack_rtt_us);
+ &sack_state);
acked -= tp->packets_out;
/* Advance cwnd if state allows */
@@ -3592,7 +3615,7 @@ old_ack:
*/
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
- &sack_rtt_us);
+ &sack_state);
tcp_fastretrans_alert(sk, acked, prior_unsacked,
is_dupack, flag);
}
@@ -4236,7 +4259,7 @@ static void tcp_ofo_queue(struct sock *sk)
tail = skb_peek_tail(&sk->sk_receive_queue);
eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
if (!eaten)
__skb_queue_tail(&sk->sk_receive_queue, skb);
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4404,7 +4427,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
__skb_pull(skb, hdrlen);
eaten = (tail &&
tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
- tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
if (!eaten) {
__skb_queue_tail(&sk->sk_receive_queue, skb);
skb_set_owner_r(skb, sk);
@@ -4491,13 +4514,15 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
if (eaten <= 0) {
queue_and_out:
- if (eaten < 0 &&
- tcp_try_rmem_schedule(sk, skb, skb->truesize))
- goto drop;
-
+ if (eaten < 0) {
+ if (skb_queue_len(&sk->sk_receive_queue) == 0)
+ sk_forced_mem_schedule(sk, skb->truesize);
+ else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+ goto drop;
+ }
eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
}
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
if (skb->len)
tcp_event_data_recv(sk, skb);
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4765,7 +4790,7 @@ static int tcp_prune_queue(struct sock *sk)
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk);
- else if (sk_under_memory_pressure(sk))
+ else if (tcp_under_memory_pressure(sk))
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
tcp_collapse_ofo_queue(sk);
@@ -4809,7 +4834,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk)
return false;
/* If we are under global TCP memory pressure, do not expand. */
- if (sk_under_memory_pressure(sk))
+ if (tcp_under_memory_pressure(sk))
return false;
/* If we are under soft global TCP memory pressure, do not expand. */
@@ -5245,7 +5270,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_rcv_rtt_measure_ts(sk, skb);
__skb_pull(skb, tcp_header_len);
- tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+ tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
eaten = 1;
}
@@ -6044,6 +6069,23 @@ static bool tcp_syn_flood_action(struct sock *sk,
return want_cookie;
}
+static void tcp_reqsk_record_syn(const struct sock *sk,
+ struct request_sock *req,
+ const struct sk_buff *skb)
+{
+ if (tcp_sk(sk)->save_syn) {
+ u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
+ u32 *copy;
+
+ copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
+ if (copy) {
+ copy[0] = len;
+ memcpy(&copy[1], skb_network_header(skb), len);
+ req->saved_syn = copy;
+ }
+ }
+}
+
int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb)
@@ -6176,6 +6218,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->tfo_listener = false;
af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
}
+ tcp_reqsk_record_syn(sk, req, skb);
return 0;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fc1c658ec6c1..feb875769b8d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1626,6 +1626,7 @@ process:
skb->dev = NULL;
bh_lock_sock_nested(sk);
+ tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
ret = 0;
if (!sock_owned_by_user(sk)) {
if (!tcp_prequeue(sk, skb))
@@ -1802,6 +1803,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
/* If socket is aborted during connect operation */
tcp_free_fastopen_req(tp);
+ tcp_saved_syn_free(tp);
sk_sockets_allocated_dec(sk);
sock_release_memcg(sk);
@@ -2410,12 +2412,15 @@ static int __net_init tcp_sk_init(struct net *net)
goto fail;
*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
}
+
net->ipv4.sysctl_tcp_ecn = 2;
+ net->ipv4.sysctl_tcp_ecn_fallback = 1;
+
net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
- return 0;
+ return 0;
fail:
tcp_sk_exit(net);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e5d7649136fc..df7fe3c31162 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -300,7 +300,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_v6_daddr = sk->sk_v6_daddr;
tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
tw->tw_tclass = np->tclass;
- tw->tw_flowlabel = np->flow_label >> 12;
+ tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK);
tw->tw_ipv6only = sk->sk_ipv6only;
}
#endif
@@ -448,6 +448,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->rcv_wup = newtp->copied_seq =
newtp->rcv_nxt = treq->rcv_isn + 1;
+ newtp->segs_in = 0;
newtp->snd_sml = newtp->snd_una =
newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
@@ -536,6 +537,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->fastopen_rsk = NULL;
newtp->syn_data_acked = 0;
+ newtp->saved_syn = req->saved_syn;
+ req->saved_syn = NULL;
+
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
}
return newsk;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a369e8a70b2c..534e5fdb04c1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -350,6 +350,15 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
}
}
+static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
+{
+ if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
+ /* tp->ecn_flags are cleared at a later point in time when
+ * SYN ACK is ultimatively being received.
+ */
+ TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
+}
+
static void
tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th,
struct sock *sk)
@@ -1018,6 +1027,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
tcp_skb_pcount(skb));
+ tp->segs_out += tcp_skb_pcount(skb);
/* OK, its time to fill skb_shinfo(skb)->gso_segs */
skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
@@ -1163,7 +1173,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
return -ENOMEM;
/* Get a new skb... force flag on. */
- buff = sk_stream_alloc_skb(sk, nsize, gfp);
+ buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
if (!buff)
return -ENOMEM; /* We'll just try again later. */
@@ -1722,7 +1732,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
if (skb->len != skb->data_len)
return tcp_fragment(sk, skb, len, mss_now, gfp);
- buff = sk_stream_alloc_skb(sk, 0, gfp);
+ buff = sk_stream_alloc_skb(sk, 0, gfp, true);
if (unlikely(!buff))
return -ENOMEM;
@@ -1941,7 +1951,7 @@ static int tcp_mtu_probe(struct sock *sk)
}
/* We're allowed to probe. Build it now. */
- nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC);
+ nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
if (!nskb)
return -1;
sk->sk_wmem_queued += nskb->truesize;
@@ -2392,7 +2402,7 @@ u32 __tcp_select_window(struct sock *sk)
if (free_space < (full_space >> 1)) {
icsk->icsk_ack.quick = 0;
- if (sk_under_memory_pressure(sk))
+ if (tcp_under_memory_pressure(sk))
tp->rcv_ssthresh = min(tp->rcv_ssthresh,
4U * tp->advmss);
@@ -2615,6 +2625,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
}
}
+ /* RFC3168, section 6.1.1.1. ECN fallback */
+ if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
+ tcp_ecn_clear_syn(sk, skb);
+
tcp_retrans_try_collapse(sk, skb, cur_mss);
/* Make a copy, if the first transmission SKB clone we made
@@ -2816,8 +2830,10 @@ begin_fwd:
* connection tear down and (memory) recovery.
* Otherwise tcp_send_fin() could be tempted to either delay FIN
* or even be forced to close flow without any FIN.
+ * In general, we want to allow one skb per socket to avoid hangs
+ * with edge trigger epoll()
*/
-static void sk_forced_wmem_schedule(struct sock *sk, int size)
+void sk_forced_mem_schedule(struct sock *sk, int size)
{
int amt, status;
@@ -2841,7 +2857,7 @@ void tcp_send_fin(struct sock *sk)
* Note: in the latter case, FIN packet will be sent after a timeout,
* as TCP stack thinks it has already been transmitted.
*/
- if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) {
+ if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
coalesce:
TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
TCP_SKB_CB(tskb)->end_seq++;
@@ -2864,7 +2880,7 @@ coalesce:
return;
}
skb_reserve(skb, MAX_TCP_HEADER);
- sk_forced_wmem_schedule(sk, skb->truesize);
+ sk_forced_mem_schedule(sk, skb->truesize);
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
tcp_init_nondata_skb(skb, tp->write_seq,
TCPHDR_ACK | TCPHDR_FIN);
@@ -3175,7 +3191,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
/* limit to order-0 allocations */
space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
- syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation);
+ syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation, false);
if (!syn_data)
goto fallback;
syn_data->ip_summed = CHECKSUM_PARTIAL;
@@ -3241,7 +3257,7 @@ int tcp_connect(struct sock *sk)
return 0;
}
- buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
+ buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
if (unlikely(!buff))
return -ENOBUFS;
@@ -3382,7 +3398,7 @@ EXPORT_SYMBOL_GPL(tcp_send_ack);
* one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
* out-of-date with SND.UNA-1 to probe window.
*/
-static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
+static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -3400,6 +3416,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
*/
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
skb_mstamp_get(&skb->skb_mstamp);
+ NET_INC_STATS_BH(sock_net(sk), mib);
return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
}
@@ -3407,12 +3424,12 @@ void tcp_send_window_probe(struct sock *sk)
{
if (sk->sk_state == TCP_ESTABLISHED) {
tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
- tcp_xmit_probe_skb(sk, 0);
+ tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
}
}
/* Initiate keepalive or window probe from timer. */
-int tcp_write_wakeup(struct sock *sk)
+int tcp_write_wakeup(struct sock *sk, int mib)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -3449,8 +3466,8 @@ int tcp_write_wakeup(struct sock *sk)
return err;
} else {
if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
- tcp_xmit_probe_skb(sk, 1);
- return tcp_xmit_probe_skb(sk, 0);
+ tcp_xmit_probe_skb(sk, 1, mib);
+ return tcp_xmit_probe_skb(sk, 0, mib);
}
}
@@ -3464,7 +3481,7 @@ void tcp_send_probe0(struct sock *sk)
unsigned long probe_max;
int err;
- err = tcp_write_wakeup(sk);
+ err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
if (tp->packets_out || !tcp_send_head(sk)) {
/* Cancel probe timer, if it is not required. */
@@ -3490,7 +3507,7 @@ void tcp_send_probe0(struct sock *sk)
probe_max = TCP_RESOURCE_PROBE_INTERVAL;
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
- inet_csk_rto_backoff(icsk, probe_max),
+ tcp_probe0_when(sk, probe_max),
TCP_RTO_MAX);
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8c65dc147d8b..5b752f58a900 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -247,7 +247,7 @@ void tcp_delack_timer_handler(struct sock *sk)
}
out:
- if (sk_under_memory_pressure(sk))
+ if (tcp_under_memory_pressure(sk))
sk_mem_reclaim(sk);
}
@@ -616,7 +616,7 @@ static void tcp_keepalive_timer (unsigned long data)
tcp_write_err(sk);
goto out;
}
- if (tcp_write_wakeup(sk) <= 0) {
+ if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
icsk->icsk_probes_out++;
elapsed = keepalive_intvl_when(tp);
} else {
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index c71a1b8f7bde..a6cea1d5e20d 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -286,18 +286,19 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
}
/* Extract info for Tcp socket info provided via netlink. */
-int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr,
+ union tcp_cc_info *info)
{
const struct vegas *ca = inet_csk_ca(sk);
+
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
- struct tcpvegas_info info = {
- .tcpv_enabled = ca->doing_vegas_now,
- .tcpv_rttcnt = ca->cntRTT,
- .tcpv_rtt = ca->baseRTT,
- .tcpv_minrtt = ca->minRTT,
- };
-
- return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+ info->vegas.tcpv_enabled = ca->doing_vegas_now,
+ info->vegas.tcpv_rttcnt = ca->cntRTT,
+ info->vegas.tcpv_rtt = ca->baseRTT,
+ info->vegas.tcpv_minrtt = ca->minRTT,
+
+ *attr = INET_DIAG_VEGASINFO;
+ return sizeof(struct tcpvegas_info);
}
return 0;
}
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
index e8a6b33cc61d..ef9da5306c68 100644
--- a/net/ipv4/tcp_vegas.h
+++ b/net/ipv4/tcp_vegas.h
@@ -19,6 +19,7 @@ void tcp_vegas_init(struct sock *sk);
void tcp_vegas_state(struct sock *sk, u8 ca_state);
void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us);
void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
-int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
+size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr,
+ union tcp_cc_info *info);
#endif /* __TCP_VEGAS_H */
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index b3c57cceb990..c10732e39837 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -256,18 +256,19 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
}
/* Extract info for Tcp socket info provided via netlink. */
-static int tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+static size_t tcp_westwood_info(struct sock *sk, u32 ext, int *attr,
+ union tcp_cc_info *info)
{
const struct westwood *ca = inet_csk_ca(sk);
if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
- struct tcpvegas_info info = {
- .tcpv_enabled = 1,
- .tcpv_rtt = jiffies_to_usecs(ca->rtt),
- .tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
- };
+ info->vegas.tcpv_enabled = 1;
+ info->vegas.tcpv_rttcnt = 0;
+ info->vegas.tcpv_rtt = jiffies_to_usecs(ca->rtt),
+ info->vegas.tcpv_minrtt = jiffies_to_usecs(ca->rtt_min),
- return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+ *attr = INET_DIAG_VEGASINFO;
+ return sizeof(struct tcpvegas_info);
}
return 0;
}
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 6bb98cc193c9..933ea903f7b8 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -15,12 +15,10 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
struct socket *sock = NULL;
struct sockaddr_in udp_addr;
- err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &sock);
+ err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock);
if (err < 0)
goto error;
- sk_change_net(sock->sk, net);
-
udp_addr.sin_family = AF_INET;
udp_addr.sin_addr = cfg->local_ip;
udp_addr.sin_port = cfg->local_udp_port;
@@ -47,7 +45,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
error:
if (sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
- sk_release_kernel(sock->sk);
+ sock_release(sock);
}
*sockp = NULL;
return err;
@@ -101,7 +99,7 @@ void udp_tunnel_sock_release(struct socket *sock)
{
rcu_assign_sk_user_data(sock->sk, NULL);
kernel_sock_shutdown(sock, SHUT_RDWR);
- sk_release_kernel(sock->sk);
+ sock_release(sock);
}
EXPORT_SYMBOL_GPL(udp_tunnel_sock_release);
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 2e8c06108ab9..0f3f1999719a 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -48,4 +48,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
ifneq ($(CONFIG_IPV6),)
obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o
+obj-y += mcast_snoop.o
endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 37b70e82bff8..21c2c818df3b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2121,6 +2121,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
if (!fn)
goto out;
+
+ noflags |= RTF_CACHE;
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
if (rt->dst.dev->ifindex != dev->ifindex)
continue;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index eef63b394c5a..f3866c0b6cfe 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -167,7 +167,7 @@ lookup_protocol:
WARN_ON(!answer_prot->slab);
err = -ENOBUFS;
- sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot);
+ sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern);
if (!sk)
goto out;
@@ -768,6 +768,7 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.auto_flowlabels = 0;
net->ipv6.sysctl.idgen_retries = 3;
net->ipv6.sysctl.idgen_delay = 1 * HZ;
+ net->ipv6.sysctl.flowlabel_state_ranges = 1;
atomic_set(&net->ipv6.fib6_sernum, 1);
err = ipv6_init_mibs(net);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 96dbffff5a24..bde57b113009 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -693,6 +693,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
{
struct rt6_info *iter = NULL;
struct rt6_info **ins;
+ struct rt6_info **fallback_ins = NULL;
int replace = (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_REPLACE));
int add = (!info->nlh ||
@@ -716,8 +717,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
(info->nlh->nlmsg_flags & NLM_F_EXCL))
return -EEXIST;
if (replace) {
- found++;
- break;
+ if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
+ found++;
+ break;
+ }
+ if (rt_can_ecmp)
+ fallback_ins = fallback_ins ?: ins;
+ goto next_iter;
}
if (iter->dst.dev == rt->dst.dev &&
@@ -753,9 +759,17 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
if (iter->rt6i_metric > rt->rt6i_metric)
break;
+next_iter:
ins = &iter->dst.rt6_next;
}
+ if (fallback_ins && !found) {
+ /* No ECMP-able route found, replace first non-ECMP one */
+ ins = fallback_ins;
+ iter = *ins;
+ found++;
+ }
+
/* Reset round-robin state, if necessary */
if (ins == &fn->leaf)
fn->rr_ptr = NULL;
@@ -815,6 +829,8 @@ add:
}
} else {
+ int nsiblings;
+
if (!found) {
if (add)
goto add;
@@ -835,8 +851,27 @@ add:
info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
fn->fn_flags |= RTN_RTINFO;
}
+ nsiblings = iter->rt6i_nsiblings;
fib6_purge_rt(iter, fn, info->nl_net);
rt6_release(iter);
+
+ if (nsiblings) {
+ /* Replacing an ECMP route, remove all siblings */
+ ins = &rt->dst.rt6_next;
+ iter = *ins;
+ while (iter) {
+ if (rt6_qualify_for_ecmp(iter)) {
+ *ins = iter->dst.rt6_next;
+ fib6_purge_rt(iter, fn, info->nl_net);
+ rt6_release(iter);
+ nsiblings--;
+ } else {
+ ins = &iter->dst.rt6_next;
+ }
+ iter = *ins;
+ }
+ WARN_ON(nsiblings != 0);
+ }
}
return 0;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index d491125011c4..1f9ebe3cbb4a 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -595,6 +595,10 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
return -EINVAL;
+ if (net->ipv6.sysctl.flowlabel_state_ranges &&
+ (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
+ return -ERANGE;
+
fl = fl_create(net, sk, &freq, optval, optlen, &err);
if (!fl)
return err;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7fde1f265c90..bc09cb97b840 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -886,22 +886,45 @@ static int ip6_dst_lookup_tail(struct sock *sk,
#endif
int err;
- if (!*dst)
- *dst = ip6_route_output(net, sk, fl6);
-
- err = (*dst)->error;
- if (err)
- goto out_err_release;
+ /* The correct way to handle this would be to do
+ * ip6_route_get_saddr, and then ip6_route_output; however,
+ * the route-specific preferred source forces the
+ * ip6_route_output call _before_ ip6_route_get_saddr.
+ *
+ * In source specific routing (no src=any default route),
+ * ip6_route_output will fail given src=any saddr, though, so
+ * that's why we try it again later.
+ */
+ if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
+ struct rt6_info *rt;
+ bool had_dst = *dst != NULL;
- if (ipv6_addr_any(&fl6->saddr)) {
- struct rt6_info *rt = (struct rt6_info *) *dst;
+ if (!had_dst)
+ *dst = ip6_route_output(net, sk, fl6);
+ rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
err = ip6_route_get_saddr(net, rt, &fl6->daddr,
sk ? inet6_sk(sk)->srcprefs : 0,
&fl6->saddr);
if (err)
goto out_err_release;
+
+ /* If we had an erroneous initial result, pretend it
+ * never existed and let the SA-enabled version take
+ * over.
+ */
+ if (!had_dst && (*dst)->error) {
+ dst_release(*dst);
+ *dst = NULL;
+ }
}
+ if (!*dst)
+ *dst = ip6_route_output(net, sk, fl6);
+
+ err = (*dst)->error;
+ if (err)
+ goto out_err_release;
+
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
/*
* Here if the dst entry we've looked up
@@ -1277,8 +1300,10 @@ emsgsize:
/* If this is the first and only packet and device
* supports checksum offloading, let's use it.
+ * Use transhdrlen, same as IPv4, because partial
+ * sums only work when transhdrlen is set.
*/
- if (!skb && sk->sk_protocol == IPPROTO_UDP &&
+ if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
length + fragheaderlen < mtu &&
rt->dst.dev->features & NETIF_F_V6_CSUM &&
!exthdrlen)
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index bba8903e871f..e1a1136bda7c 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -19,12 +19,10 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
int err;
struct socket *sock = NULL;
- err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock);
+ err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, 0, &sock);
if (err < 0)
goto error;
- sk_change_net(sock->sk, net);
-
udp6_addr.sin6_family = AF_INET6;
memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
sizeof(udp6_addr.sin6_addr));
@@ -55,7 +53,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
error:
if (sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
- sk_release_kernel(sock->sk);
+ sock_release(sock);
}
*sockp = NULL;
return err;
diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c
new file mode 100644
index 000000000000..df8afe5ab31e
--- /dev/null
+++ b/net/ipv6/mcast_snoop.c
@@ -0,0 +1,213 @@
+/* Copyright (C) 2010: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
+ * Copyright (C) 2015: Linus L├╝ssing <linus.luessing@c0d3.blue>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Based on the MLD support added to br_multicast.c by YOSHIFUJI Hideaki.
+ */
+
+#include <linux/skbuff.h>
+#include <net/ipv6.h>
+#include <net/mld.h>
+#include <net/addrconf.h>
+#include <net/ip6_checksum.h>
+
+static int ipv6_mc_check_ip6hdr(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ip6h;
+ unsigned int len;
+ unsigned int offset = skb_network_offset(skb) + sizeof(*ip6h);
+
+ if (!pskb_may_pull(skb, offset))
+ return -EINVAL;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->version != 6)
+ return -EINVAL;
+
+ len = offset + ntohs(ip6h->payload_len);
+ if (skb->len < len || len <= offset)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ipv6_mc_check_exthdrs(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ip6h;
+ int offset;
+ u8 nexthdr;
+ __be16 frag_off;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->nexthdr != IPPROTO_HOPOPTS)
+ return -ENOMSG;
+
+ nexthdr = ip6h->nexthdr;
+ offset = skb_network_offset(skb) + sizeof(*ip6h);
+ offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
+
+ if (offset < 0)
+ return -EINVAL;
+
+ if (nexthdr != IPPROTO_ICMPV6)
+ return -ENOMSG;
+
+ skb_set_transport_header(skb, offset);
+
+ return 0;
+}
+
+static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb)
+{
+ unsigned int len = skb_transport_offset(skb);
+
+ len += sizeof(struct mld2_report);
+
+ return pskb_may_pull(skb, len) ? 0 : -EINVAL;
+}
+
+static int ipv6_mc_check_mld_query(struct sk_buff *skb)
+{
+ struct mld_msg *mld;
+ unsigned int len = skb_transport_offset(skb);
+
+ /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */
+ if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
+ return -EINVAL;
+
+ len += sizeof(struct mld_msg);
+ if (skb->len < len)
+ return -EINVAL;
+
+ /* MLDv1? */
+ if (skb->len != len) {
+ /* or MLDv2? */
+ len += sizeof(struct mld2_query) - sizeof(struct mld_msg);
+ if (skb->len < len || !pskb_may_pull(skb, len))
+ return -EINVAL;
+ }
+
+ mld = (struct mld_msg *)skb_transport_header(skb);
+
+ /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer
+ * all-nodes destination address (ff02::1) for general queries
+ */
+ if (ipv6_addr_any(&mld->mld_mca) &&
+ !ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ipv6_mc_check_mld_msg(struct sk_buff *skb)
+{
+ struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb);
+
+ switch (mld->mld_type) {
+ case ICMPV6_MGM_REDUCTION:
+ case ICMPV6_MGM_REPORT:
+ /* fall through */
+ return 0;
+ case ICMPV6_MLD2_REPORT:
+ return ipv6_mc_check_mld_reportv2(skb);
+ case ICMPV6_MGM_QUERY:
+ return ipv6_mc_check_mld_query(skb);
+ default:
+ return -ENOMSG;
+ }
+}
+
+static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb)
+{
+ return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo);
+}
+
+static int __ipv6_mc_check_mld(struct sk_buff *skb,
+ struct sk_buff **skb_trimmed)
+
+{
+ struct sk_buff *skb_chk = NULL;
+ unsigned int transport_len;
+ unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg);
+ int ret;
+
+ transport_len = ntohs(ipv6_hdr(skb)->payload_len);
+ transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr);
+
+ skb_get(skb);
+ skb_chk = skb_checksum_trimmed(skb, transport_len,
+ ipv6_mc_validate_checksum);
+ if (!skb_chk)
+ return -EINVAL;
+
+ if (!pskb_may_pull(skb_chk, len)) {
+ kfree_skb(skb_chk);
+ return -EINVAL;
+ }
+
+ ret = ipv6_mc_check_mld_msg(skb_chk);
+ if (ret) {
+ kfree_skb(skb_chk);
+ return ret;
+ }
+
+ if (skb_trimmed)
+ *skb_trimmed = skb_chk;
+ else
+ kfree_skb(skb_chk);
+
+ return 0;
+}
+
+/**
+ * ipv6_mc_check_mld - checks whether this is a sane MLD packet
+ * @skb: the skb to validate
+ * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional)
+ *
+ * Checks whether an IPv6 packet is a valid MLD packet. If so sets
+ * skb network and transport headers accordingly and returns zero.
+ *
+ * -EINVAL: A broken packet was detected, i.e. it violates some internet
+ * standard
+ * -ENOMSG: IP header validation succeeded but it is not an MLD packet.
+ * -ENOMEM: A memory allocation failure happened.
+ *
+ * Optionally, an skb pointer might be provided via skb_trimmed (or set it
+ * to NULL): After parsing an MLD packet successfully it will point to
+ * an skb which has its tail aligned to the IP packet end. This might
+ * either be the originally provided skb or a trimmed, cloned version if
+ * the skb frame had data beyond the IP packet. A cloned skb allows us
+ * to leave the original skb and its full frame unchanged (which might be
+ * desirable for layer 2 frame jugglers).
+ *
+ * The caller needs to release a reference count from any returned skb_trimmed.
+ */
+int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed)
+{
+ int ret;
+
+ ret = ipv6_mc_check_ip6hdr(skb);
+ if (ret < 0)
+ return ret;
+
+ ret = ipv6_mc_check_exthdrs(skb);
+ if (ret < 0)
+ return ret;
+
+ return __ipv6_mc_check_mld(skb, skb_trimmed);
+}
+EXPORT_SYMBOL(ipv6_mc_check_mld);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1a732a1d3c8e..62f5b0d0bc9b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1275,6 +1275,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
/* overflow check */
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
+ if (tmp.num_counters == 0)
+ return -EINVAL;
+
tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
@@ -1822,6 +1825,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return -ENOMEM;
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
+ if (tmp.num_counters == 0)
+ return -EINVAL;
+
tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5c48293ff062..0c889cb89cc3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -92,6 +92,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
+static void rt6_dst_from_metrics_check(struct rt6_info *rt);
static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -104,65 +105,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
const struct in6_addr *gwaddr, int ifindex);
#endif
-static void rt6_bind_peer(struct rt6_info *rt, int create)
-{
- struct inet_peer_base *base;
- struct inet_peer *peer;
-
- base = inetpeer_base_ptr(rt->_rt6i_peer);
- if (!base)
- return;
-
- peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
- if (peer) {
- if (!rt6_set_peer(rt, peer))
- inet_putpeer(peer);
- }
-}
-
-static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
-{
- if (rt6_has_peer(rt))
- return rt6_peer_ptr(rt);
-
- rt6_bind_peer(rt, create);
- return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
-}
-
-static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
-{
- return __rt6_get_peer(rt, 1);
-}
-
static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
{
- struct rt6_info *rt = (struct rt6_info *) dst;
- struct inet_peer *peer;
- u32 *p = NULL;
+ struct rt6_info *rt = (struct rt6_info *)dst;
- if (!(rt->dst.flags & DST_HOST))
+ if (rt->rt6i_flags & RTF_CACHE)
+ return NULL;
+ else
return dst_cow_metrics_generic(dst, old);
-
- peer = rt6_get_peer_create(rt);
- if (peer) {
- u32 *old_p = __DST_METRICS_PTR(old);
- unsigned long prev, new;
-
- p = peer->metrics;
- if (inet_metrics_new(peer) ||
- (old & DST_METRICS_FORCE_OVERWRITE))
- memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
-
- new = (unsigned long) p;
- prev = cmpxchg(&dst->_metrics, old, new);
-
- if (prev != old) {
- p = __DST_METRICS_PTR(prev);
- if (prev & DST_METRICS_READ_ONLY)
- p = NULL;
- }
- }
- return p;
}
static inline const void *choose_neigh_daddr(struct rt6_info *rt,
@@ -311,7 +261,6 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
struct dst_entry *dst = &rt->dst;
memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
- rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
INIT_LIST_HEAD(&rt->rt6i_siblings);
}
return rt;
@@ -323,8 +272,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
struct inet6_dev *idev = rt->rt6i_idev;
struct dst_entry *from = dst->from;
- if (!(rt->dst.flags & DST_HOST))
- dst_destroy_metrics_generic(dst);
+ dst_destroy_metrics_generic(dst);
if (idev) {
rt->rt6i_idev = NULL;
@@ -333,11 +281,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)
dst->from = NULL;
dst_release(from);
-
- if (rt6_has_peer(rt)) {
- struct inet_peer *peer = rt6_peer_ptr(rt);
- inet_putpeer(peer);
- }
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -652,15 +595,33 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
u32 metric, int oif, int strict,
bool *do_rr)
{
- struct rt6_info *rt, *match;
+ struct rt6_info *rt, *match, *cont;
int mpri = -1;
match = NULL;
- for (rt = rr_head; rt && rt->rt6i_metric == metric;
- rt = rt->dst.rt6_next)
+ cont = NULL;
+ for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
+ if (rt->rt6i_metric != metric) {
+ cont = rt;
+ break;
+ }
+
+ match = find_match(rt, oif, strict, &mpri, match, do_rr);
+ }
+
+ for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
+ if (rt->rt6i_metric != metric) {
+ cont = rt;
+ break;
+ }
+
match = find_match(rt, oif, strict, &mpri, match, do_rr);
- for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
- rt = rt->dst.rt6_next)
+ }
+
+ if (match || !cont)
+ return match;
+
+ for (rt = cont; rt; rt = rt->dst.rt6_next)
match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
@@ -959,7 +920,7 @@ redo_rt6_select:
if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
- else if (!(rt->dst.flags & DST_HOST))
+ else if (!(rt->dst.flags & DST_HOST) || !(rt->rt6i_flags & RTF_LOCAL))
nrt = rt6_alloc_clone(rt, &fl6->daddr);
else
goto out2;
@@ -985,6 +946,7 @@ redo_rt6_select:
goto redo_fib6_lookup_lock;
out2:
+ rt6_dst_from_metrics_check(rt);
rt->dst.lastuse = jiffies;
rt->dst.__use++;
@@ -1059,7 +1021,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
new = &rt->dst;
memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
- rt6_init_peer(rt, net->ipv6.peers);
new->__use = 1;
new->input = dst_discard;
@@ -1093,6 +1054,13 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
* Destination cache support functions
*/
+static void rt6_dst_from_metrics_check(struct rt6_info *rt)
+{
+ if (rt->dst.from &&
+ dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
+ dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
+}
+
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
{
struct rt6_info *rt;
@@ -1109,6 +1077,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
if (rt6_check_expired(rt))
return NULL;
+ rt6_dst_from_metrics_check(rt);
+
return dst;
}
@@ -1154,14 +1124,14 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct rt6_info *rt6 = (struct rt6_info *)dst;
dst_confirm(dst);
- if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
+ if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) {
struct net *net = dev_net(dst->dev);
rt6->rt6i_flags |= RTF_MODIFIED;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- dst_metric_set(dst, RTAX_MTU, mtu);
+ rt6->rt6i_pmtu = mtu;
rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
}
}
@@ -1341,12 +1311,17 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
static unsigned int ip6_mtu(const struct dst_entry *dst)
{
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
+ unsigned int mtu = rt->rt6i_pmtu;
struct inet6_dev *idev;
- unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
goto out;
+ mtu = dst_metric_raw(dst, RTAX_MTU);
+ if (mtu)
+ goto out;
+
mtu = IPV6_MIN_MTU;
rcu_read_lock();
@@ -1590,10 +1565,8 @@ int ip6_route_add(struct fib6_config *cfg)
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
rt->rt6i_dst.plen = cfg->fc_dst_len;
- if (rt->rt6i_dst.plen == 128) {
+ if (rt->rt6i_dst.plen == 128)
rt->dst.flags |= DST_HOST;
- dst_metrics_set_force_overwrite(&rt->dst);
- }
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@@ -1651,6 +1624,16 @@ int ip6_route_add(struct fib6_config *cfg)
int gwa_type;
gw_addr = &cfg->fc_gateway;
+
+ /* if gw_addr is local we will fail to detect this in case
+ * address is still TENTATIVE (DAD in progress). rt6_lookup()
+ * will return already-added prefix route via interface that
+ * prefix route was assigned to, which might be non-loopback.
+ */
+ err = -EINVAL;
+ if (ipv6_chk_addr_and_flags(net, gw_addr, NULL, 0, 0))
+ goto out;
+
rt->rt6i_gateway = *gw_addr;
gwa_type = ipv6_addr_type(gw_addr);
@@ -1664,7 +1647,6 @@ int ip6_route_add(struct fib6_config *cfg)
(SIT, PtP, NBMA NOARP links) it is handy to allow
some exceptions. --ANK
*/
- err = -EINVAL;
if (!(gwa_type & IPV6_ADDR_UNICAST))
goto out;
@@ -1785,6 +1767,9 @@ static int ip6_route_del(struct fib6_config *cfg)
if (fn) {
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ if ((rt->rt6i_flags & RTF_CACHE) &&
+ !(cfg->fc_flags & RTF_CACHE))
+ continue;
if (cfg->fc_ifindex &&
(!rt->dst.dev ||
rt->dst.dev->ifindex != cfg->fc_ifindex))
@@ -1926,12 +1911,27 @@ out:
* Misc support functions
*/
+static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
+{
+ BUG_ON(from->dst.from);
+
+ rt->rt6i_flags &= ~RTF_EXPIRES;
+ dst_hold(&from->dst);
+ rt->dst.from = &from->dst;
+ dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
+}
+
static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
const struct in6_addr *dest)
{
struct net *net = dev_net(ort->dst.dev);
- struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
- ort->rt6i_table);
+ struct rt6_info *rt;
+
+ if (ort->rt6i_flags & RTF_CACHE)
+ ort = (struct rt6_info *)ort->dst.from;
+
+ rt = ip6_dst_alloc(net, ort->dst.dev, 0,
+ ort->rt6i_table);
if (rt) {
rt->dst.input = ort->dst.input;
@@ -1940,7 +1940,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
rt->rt6i_dst.addr = *dest;
rt->rt6i_dst.plen = 128;
- dst_copy_metrics(&rt->dst, &ort->dst);
rt->dst.error = ort->dst.error;
rt->rt6i_idev = ort->rt6i_idev;
if (rt->rt6i_idev)
@@ -2245,9 +2244,10 @@ int ip6_route_get_saddr(struct net *net,
unsigned int prefs,
struct in6_addr *saddr)
{
- struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
+ struct inet6_dev *idev =
+ rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
int err = 0;
- if (rt->rt6i_prefsrc.plen)
+ if (rt && rt->rt6i_prefsrc.plen)
*saddr = rt->rt6i_prefsrc.addr;
else
err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
@@ -2372,11 +2372,20 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
PMTU discouvery.
*/
if (rt->dst.dev == arg->dev &&
- !dst_metric_locked(&rt->dst, RTAX_MTU) &&
- (dst_mtu(&rt->dst) >= arg->mtu ||
- (dst_mtu(&rt->dst) < arg->mtu &&
- dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
- dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
+ !dst_metric_locked(&rt->dst, RTAX_MTU)) {
+ if (rt->rt6i_flags & RTF_CACHE) {
+ /* For RTF_CACHE with rt6i_pmtu == 0
+ * (i.e. a redirected route),
+ * the metrics of its rt->dst.from has already
+ * been updated.
+ */
+ if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
+ rt->rt6i_pmtu = arg->mtu;
+ } else if (dst_mtu(&rt->dst) >= arg->mtu ||
+ (dst_mtu(&rt->dst) < arg->mtu &&
+ dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+ dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
+ }
}
return 0;
}
@@ -2433,6 +2442,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rtm->rtm_type == RTN_LOCAL)
cfg->fc_flags |= RTF_LOCAL;
+ if (rtm->rtm_flags & RTM_F_CLONED)
+ cfg->fc_flags |= RTF_CACHE;
+
cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -2503,9 +2515,9 @@ static int ip6_route_multipath(struct fib6_config *cfg, int add)
int attrlen;
int err = 0, last_err = 0;
+ remaining = cfg->fc_mp_len;
beginning:
rtnh = (struct rtnexthop *)cfg->fc_mp;
- remaining = cfg->fc_mp_len;
/* Parse a Multipath Entry */
while (rtnh_ok(rtnh, remaining)) {
@@ -2535,15 +2547,19 @@ beginning:
* next hops that have been already added.
*/
add = 0;
+ remaining = cfg->fc_mp_len - remaining;
goto beginning;
}
}
/* Because each route is added like a single route we remove
- * this flag after the first nexthop (if there is a collision,
- * we have already fail to add the first nexthop:
- * fib6_add_rt2node() has reject it).
+ * these flags after the first nexthop: if there is a collision,
+ * we have already failed to add the first nexthop:
+ * fib6_add_rt2node() has rejected it; when replacing, old
+ * nexthops have been replaced by first new, the rest should
+ * be added to it.
*/
- cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
+ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
+ NLM_F_REPLACE);
rtnh = rtnh_next(rtnh, &remaining);
}
@@ -2603,6 +2619,7 @@ static int rt6_fill_node(struct net *net,
int iif, int type, u32 portid, u32 seq,
int prefix, int nowait, unsigned int flags)
{
+ u32 metrics[RTAX_MAX];
struct rtmsg *rtm;
struct nlmsghdr *nlh;
long expires;
@@ -2716,7 +2733,10 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
}
- if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
+ memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
+ if (rt->rt6i_pmtu)
+ metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
+ if (rtnetlink_put_metrics(skb, metrics) < 0)
goto nla_put_failure;
if (rt->rt6i_flags & RTF_GATEWAY) {
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index abcc79f649b3..4e705add4f18 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -68,6 +68,13 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "flowlabel_state_ranges",
+ .data = &init_net.ipv6.sysctl.flowlabel_state_ranges,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
@@ -109,6 +116,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect;
ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries;
ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
+ ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b6575d665568..fefe4455e1f3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -914,7 +914,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
- tw->tw_tclass, (tw->tw_flowlabel << 12));
+ tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
inet_twsk_put(tw);
}
@@ -1421,6 +1421,7 @@ process:
skb->dev = NULL;
bh_lock_sock_nested(sk);
+ tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
ret = 0;
if (!sock_owned_by_user(sk)) {
if (!tcp_prequeue(sk, skb))
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3477c919fcc8..c2ec41617a35 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -731,7 +731,9 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
(inet->inet_dport && inet->inet_dport != rmt_port) ||
(!ipv6_addr_any(&sk->sk_v6_daddr) &&
!ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
- (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) ||
+ (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)))
return false;
if (!inet6_mc_check(sk, loc_addr, rmt_addr))
return false;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index f337a908a76a..6ae256b4c55a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -71,13 +71,6 @@ static int xfrm6_get_tos(const struct flowi *fl)
return 0;
}
-static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst)
-{
- struct rt6_info *rt = (struct rt6_info *)xdst;
-
- rt6_init_peer(rt, net->ipv6.peers);
-}
-
static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
int nfheader_len)
{
@@ -106,8 +99,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
return -ENODEV;
}
- rt6_transfer_peer(&xdst->u.rt6, rt);
-
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
@@ -255,10 +246,6 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
if (likely(xdst->u.rt6.rt6i_idev))
in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
- if (rt6_has_peer(&xdst->u.rt6)) {
- struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6);
- inet_putpeer(peer);
- }
xfrm_dst_destroy(xdst);
}
@@ -308,7 +295,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.get_saddr = xfrm6_get_saddr,
.decode_session = _decode_session6,
.get_tos = xfrm6_get_tos,
- .init_dst = xfrm6_init_dst,
.init_path = xfrm6_init_path,
.fill_dst = xfrm6_fill_dst,
.blackhole_route = ip6_blackhole_route,
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 4ea5d7497b5f..48d0dc89b58d 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1347,7 +1347,7 @@ static int ipx_create(struct net *net, struct socket *sock, int protocol,
goto out;
rc = -ENOMEM;
- sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto);
+ sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, kern);
if (!sk)
goto out;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index ee0ea25c8e7a..fae6822cc367 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1100,7 +1100,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,
}
/* Allocate networking socket */
- sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto);
+ sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto, kern);
if (sk == NULL)
return -ENOMEM;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 6daa52a18d40..918151c11348 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -535,12 +535,12 @@ static void iucv_sock_init(struct sock *sk, struct sock *parent)
sk->sk_type = parent->sk_type;
}
-static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio)
+static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, int kern)
{
struct sock *sk;
struct iucv_sock *iucv;
- sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto);
+ sk = sk_alloc(&init_net, PF_IUCV, prio, &iucv_proto, kern);
if (!sk)
return NULL;
iucv = iucv_sk(sk);
@@ -602,7 +602,7 @@ static int iucv_sock_create(struct net *net, struct socket *sock, int protocol,
return -ESOCKTNOSUPPORT;
}
- sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL);
+ sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern);
if (!sk)
return -ENOMEM;
@@ -1723,7 +1723,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
}
/* Create the new socket */
- nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC);
+ nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
if (!nsk) {
err = pr_iucv->path_sever(path, user_data);
iucv_path_free(path);
@@ -1933,7 +1933,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
goto out;
}
- nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC);
+ nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
bh_lock_sock(sk);
if ((sk->sk_state != IUCV_LISTEN) ||
sk_acceptq_is_full(sk) ||
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f0d52d721b3a..9e834ec475a9 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -149,7 +149,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
return -EPROTONOSUPPORT;
err = -ENOMEM;
- sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto);
+ sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern);
if (sk == NULL)
goto out;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index a29a504492af..f6b090df3930 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1334,9 +1334,10 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
if (sock)
inet_shutdown(sock, 2);
} else {
- if (sock)
+ if (sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
- sk_release_kernel(sk);
+ sock_release(sock);
+ }
}
l2tp_tunnel_sock_put(sk);
@@ -1399,13 +1400,11 @@ static int l2tp_tunnel_sock_create(struct net *net,
if (cfg->local_ip6 && cfg->peer_ip6) {
struct sockaddr_l2tpip6 ip6_addr = {0};
- err = sock_create_kern(AF_INET6, SOCK_DGRAM,
+ err = sock_create_kern(net, AF_INET6, SOCK_DGRAM,
IPPROTO_L2TP, &sock);
if (err < 0)
goto out;
- sk_change_net(sock->sk, net);
-
ip6_addr.l2tp_family = AF_INET6;
memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6,
sizeof(ip6_addr.l2tp_addr));
@@ -1429,13 +1428,11 @@ static int l2tp_tunnel_sock_create(struct net *net,
{
struct sockaddr_l2tpip ip_addr = {0};
- err = sock_create_kern(AF_INET, SOCK_DGRAM,
+ err = sock_create_kern(net, AF_INET, SOCK_DGRAM,
IPPROTO_L2TP, &sock);
if (err < 0)
goto out;
- sk_change_net(sock->sk, net);
-
ip_addr.l2tp_family = AF_INET;
ip_addr.l2tp_addr = cfg->local_ip;
ip_addr.l2tp_conn_id = tunnel_id;
@@ -1462,7 +1459,7 @@ out:
*sockp = sock;
if ((err < 0) && sock) {
kernel_sock_shutdown(sock, SHUT_RDWR);
- sk_release_kernel(sock->sk);
+ sock_release(sock);
*sockp = NULL;
}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index e9b0dec56b8e..f56c9f69e9f2 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -542,12 +542,12 @@ static int pppol2tp_backlog_recv(struct sock *sk, struct sk_buff *skb)
/* socket() handler. Initialize a new struct sock.
*/
-static int pppol2tp_create(struct net *net, struct socket *sock)
+static int pppol2tp_create(struct net *net, struct socket *sock, int kern)
{
int error = -ENOMEM;
struct sock *sk;
- sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto);
+ sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto, kern);
if (!sk)
goto out;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 17a8dff06090..8fd9febaa5ba 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -168,7 +168,7 @@ static int llc_ui_create(struct net *net, struct socket *sock, int protocol,
if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) {
rc = -ENOMEM;
- sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto);
+ sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto, kern);
if (sk) {
rc = 0;
llc_ui_sk_init(sock, sk);
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 81a61fce3afb..3e821daf9dd4 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -768,7 +768,7 @@ static struct sock *llc_create_incoming_sock(struct sock *sk,
struct llc_addr *daddr)
{
struct sock *newsk = llc_sk_alloc(sock_net(sk), sk->sk_family, GFP_ATOMIC,
- sk->sk_prot);
+ sk->sk_prot, 0);
struct llc_sock *newllc, *llc = llc_sk(sk);
if (!newsk)
@@ -931,9 +931,9 @@ static void llc_sk_init(struct sock *sk)
* Allocates a LLC sock and initializes it. Returns the new LLC sock
* or %NULL if there's no memory available for one
*/
-struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot)
+struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern)
{
- struct sock *sk = sk_alloc(net, family, priority, prot);
+ struct sock *sk = sk_alloc(net, family, priority, prot, kern);
if (!sk)
goto out;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 64a012a0c6e5..086de496a4c1 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -302,6 +302,20 @@ config MAC80211_DEBUG_COUNTERS
---help---
Selecting this option causes mac80211 to keep additional
and very verbose statistics about TX and RX handler use
- and show them in debugfs.
+ as well as a few selected dot11 counters. These will be
+ exposed in debugfs.
+
+ Note that some of the counters are not concurrency safe
+ and may thus not always be accurate.
If unsure, say N.
+
+config MAC80211_STA_HASH_MAX_SIZE
+ int "Station hash table maximum size" if MAC80211_DEBUG_MENU
+ default 0
+ ---help---
+ Setting this option to a low value (e.g. 4) allows testing the
+ hash table with collisions relatively deterministically (just
+ connect more stations than the number selected here.)
+
+ If unsure, leave the default of 0.
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 265e42721a66..3469bbdc891c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -137,6 +137,9 @@ static int ieee80211_set_noack_map(struct wiphy *wiphy,
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
sdata->noack_map = noack_map;
+
+ ieee80211_check_fast_xmit_iface(sdata);
+
return 0;
}
@@ -309,6 +312,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
u32 iv32;
u16 iv16;
int err = -ENOENT;
+ struct ieee80211_key_seq kseq = {};
sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -339,10 +343,12 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
iv32 = key->u.tkip.tx.iv32;
iv16 = key->u.tkip.tx.iv16;
- if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
- drv_get_tkip_seq(sdata->local,
- key->conf.hw_key_idx,
- &iv32, &iv16);
+ if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
+ !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
+ drv_get_key_seq(sdata->local, key, &kseq);
+ iv32 = kseq.tkip.iv32;
+ iv16 = kseq.tkip.iv16;
+ }
seq[0] = iv16 & 0xff;
seq[1] = (iv16 >> 8) & 0xff;
@@ -355,52 +361,85 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
break;
case WLAN_CIPHER_SUITE_CCMP:
case WLAN_CIPHER_SUITE_CCMP_256:
- pn64 = atomic64_read(&key->u.ccmp.tx_pn);
- seq[0] = pn64;
- seq[1] = pn64 >> 8;
- seq[2] = pn64 >> 16;
- seq[3] = pn64 >> 24;
- seq[4] = pn64 >> 32;
- seq[5] = pn64 >> 40;
+ if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
+ !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
+ drv_get_key_seq(sdata->local, key, &kseq);
+ memcpy(seq, kseq.ccmp.pn, 6);
+ } else {
+ pn64 = atomic64_read(&key->u.ccmp.tx_pn);
+ seq[0] = pn64;
+ seq[1] = pn64 >> 8;
+ seq[2] = pn64 >> 16;
+ seq[3] = pn64 >> 24;
+ seq[4] = pn64 >> 32;
+ seq[5] = pn64 >> 40;
+ }
params.seq = seq;
params.seq_len = 6;
break;
case WLAN_CIPHER_SUITE_AES_CMAC:
case WLAN_CIPHER_SUITE_BIP_CMAC_256:
- pn64 = atomic64_read(&key->u.aes_cmac.tx_pn);
- seq[0] = pn64;
- seq[1] = pn64 >> 8;
- seq[2] = pn64 >> 16;
- seq[3] = pn64 >> 24;
- seq[4] = pn64 >> 32;
- seq[5] = pn64 >> 40;
+ if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
+ !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
+ drv_get_key_seq(sdata->local, key, &kseq);
+ memcpy(seq, kseq.aes_cmac.pn, 6);
+ } else {
+ pn64 = atomic64_read(&key->u.aes_cmac.tx_pn);
+ seq[0] = pn64;
+ seq[1] = pn64 >> 8;
+ seq[2] = pn64 >> 16;
+ seq[3] = pn64 >> 24;
+ seq[4] = pn64 >> 32;
+ seq[5] = pn64 >> 40;
+ }
params.seq = seq;
params.seq_len = 6;
break;
case WLAN_CIPHER_SUITE_BIP_GMAC_128:
case WLAN_CIPHER_SUITE_BIP_GMAC_256:
- pn64 = atomic64_read(&key->u.aes_gmac.tx_pn);
- seq[0] = pn64;
- seq[1] = pn64 >> 8;
- seq[2] = pn64 >> 16;
- seq[3] = pn64 >> 24;
- seq[4] = pn64 >> 32;
- seq[5] = pn64 >> 40;
+ if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
+ !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
+ drv_get_key_seq(sdata->local, key, &kseq);
+ memcpy(seq, kseq.aes_gmac.pn, 6);
+ } else {
+ pn64 = atomic64_read(&key->u.aes_gmac.tx_pn);
+ seq[0] = pn64;
+ seq[1] = pn64 >> 8;
+ seq[2] = pn64 >> 16;
+ seq[3] = pn64 >> 24;
+ seq[4] = pn64 >> 32;
+ seq[5] = pn64 >> 40;
+ }
params.seq = seq;
params.seq_len = 6;
break;
case WLAN_CIPHER_SUITE_GCMP:
case WLAN_CIPHER_SUITE_GCMP_256:
- pn64 = atomic64_read(&key->u.gcmp.tx_pn);
- seq[0] = pn64;
- seq[1] = pn64 >> 8;
- seq[2] = pn64 >> 16;
- seq[3] = pn64 >> 24;
- seq[4] = pn64 >> 32;
- seq[5] = pn64 >> 40;
+ if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
+ !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) {
+ drv_get_key_seq(sdata->local, key, &kseq);
+ memcpy(seq, kseq.gcmp.pn, 6);
+ } else {
+ pn64 = atomic64_read(&key->u.gcmp.tx_pn);
+ seq[0] = pn64;
+ seq[1] = pn64 >> 8;
+ seq[2] = pn64 >> 16;
+ seq[3] = pn64 >> 24;
+ seq[4] = pn64 >> 32;
+ seq[5] = pn64 >> 40;
+ }
params.seq = seq;
params.seq_len = 6;
break;
+ default:
+ if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
+ break;
+ if (WARN_ON(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))
+ break;
+ drv_get_key_seq(sdata->local, key, &kseq);
+ params.seq = kseq.hw.seq;
+ params.seq_len = kseq.hw.seq_len;
+ break;
}
params.key = key->conf.key;
@@ -2099,10 +2138,14 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
int err;
if (changed & WIPHY_PARAM_FRAG_THRESHOLD) {
+ ieee80211_check_fast_xmit_all(local);
+
err = drv_set_frag_threshold(local, wiphy->frag_threshold);
- if (err)
+ if (err) {
+ ieee80211_check_fast_xmit_all(local);
return err;
+ }
}
if ((changed & WIPHY_PARAM_COVERAGE_CLASS) ||
@@ -3336,8 +3379,14 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
break;
case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_P2P_CLIENT:
- if (!sdata->u.mgd.associated)
+ sdata_lock(sdata);
+ if (!sdata->u.mgd.associated ||
+ (params->offchan && params->wait &&
+ local->ops->remain_on_channel &&
+ memcmp(sdata->u.mgd.associated->bssid,
+ mgmt->bssid, ETH_ALEN)))
need_offchan = true;
+ sdata_unlock(sdata);
break;
case NL80211_IFTYPE_P2P_DEVICE:
need_offchan = true;
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 5bcd4e5589d3..f01c18a3160e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -664,6 +664,8 @@ out:
ieee80211_bss_info_change_notify(sdata,
BSS_CHANGED_IDLE);
+ ieee80211_check_fast_xmit_iface(sdata);
+
return ret;
}
@@ -1008,6 +1010,8 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
if (WARN_ON(!chandef))
return -EINVAL;
+ ieee80211_change_chanctx(local, new_ctx, chandef);
+
vif_chsw[0].vif = &sdata->vif;
vif_chsw[0].old_ctx = &old_ctx->conf;
vif_chsw[0].new_ctx = &new_ctx->conf;
@@ -1030,6 +1034,8 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata)
if (sdata->vif.type == NL80211_IFTYPE_AP)
__ieee80211_vif_copy_chanctx_to_vlans(sdata, false);
+ ieee80211_check_fast_xmit_iface(sdata);
+
if (ieee80211_chanctx_refcount(local, old_ctx) == 0)
ieee80211_free_chanctx(local, old_ctx);
@@ -1079,6 +1085,8 @@ ieee80211_vif_use_reserved_assign(struct ieee80211_sub_if_data *sdata)
if (WARN_ON(!chandef))
return -EINVAL;
+ ieee80211_change_chanctx(local, new_ctx, chandef);
+
list_del(&sdata->reserved_chanctx_list);
sdata->reserved_chanctx = NULL;
@@ -1376,6 +1384,8 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
__ieee80211_vif_copy_chanctx_to_vlans(sdata,
false);
+ ieee80211_check_fast_xmit_iface(sdata);
+
sdata->radar_required = sdata->reserved_radar_required;
if (sdata->vif.bss_conf.chandef.width !=
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 23813ebb349c..b17206db49b4 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -219,8 +219,8 @@ static const struct file_operations stats_ ##name## _ops = { \
.llseek = generic_file_llseek, \
};
-#define DEBUGFS_STATS_ADD(name, field) \
- debugfs_create_u32(#name, 0400, statsd, (u32 *) &field);
+#define DEBUGFS_STATS_ADD(name) \
+ debugfs_create_u32(#name, 0400, statsd, &local->name);
#define DEBUGFS_DEVSTATS_ADD(name) \
debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops);
@@ -255,53 +255,31 @@ void debugfs_hw_add(struct ieee80211_local *local)
if (!statsd)
return;
- DEBUGFS_STATS_ADD(transmitted_fragment_count,
- local->dot11TransmittedFragmentCount);
- DEBUGFS_STATS_ADD(multicast_transmitted_frame_count,
- local->dot11MulticastTransmittedFrameCount);
- DEBUGFS_STATS_ADD(failed_count, local->dot11FailedCount);
- DEBUGFS_STATS_ADD(retry_count, local->dot11RetryCount);
- DEBUGFS_STATS_ADD(multiple_retry_count,
- local->dot11MultipleRetryCount);
- DEBUGFS_STATS_ADD(frame_duplicate_count,
- local->dot11FrameDuplicateCount);
- DEBUGFS_STATS_ADD(received_fragment_count,
- local->dot11ReceivedFragmentCount);
- DEBUGFS_STATS_ADD(multicast_received_frame_count,
- local->dot11MulticastReceivedFrameCount);
- DEBUGFS_STATS_ADD(transmitted_frame_count,
- local->dot11TransmittedFrameCount);
#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
- DEBUGFS_STATS_ADD(tx_handlers_drop, local->tx_handlers_drop);
- DEBUGFS_STATS_ADD(tx_handlers_queued, local->tx_handlers_queued);
- DEBUGFS_STATS_ADD(tx_handlers_drop_fragment,
- local->tx_handlers_drop_fragment);
- DEBUGFS_STATS_ADD(tx_handlers_drop_wep,
- local->tx_handlers_drop_wep);
- DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc,
- local->tx_handlers_drop_not_assoc);
- DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port,
- local->tx_handlers_drop_unauth_port);
- DEBUGFS_STATS_ADD(rx_handlers_drop, local->rx_handlers_drop);
- DEBUGFS_STATS_ADD(rx_handlers_queued, local->rx_handlers_queued);
- DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc,
- local->rx_handlers_drop_nullfunc);
- DEBUGFS_STATS_ADD(rx_handlers_drop_defrag,
- local->rx_handlers_drop_defrag);
- DEBUGFS_STATS_ADD(rx_handlers_drop_short,
- local->rx_handlers_drop_short);
- DEBUGFS_STATS_ADD(tx_expand_skb_head,
- local->tx_expand_skb_head);
- DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned,
- local->tx_expand_skb_head_cloned);
- DEBUGFS_STATS_ADD(rx_expand_skb_head,
- local->rx_expand_skb_head);
- DEBUGFS_STATS_ADD(rx_expand_skb_head2,
- local->rx_expand_skb_head2);
- DEBUGFS_STATS_ADD(rx_handlers_fragments,
- local->rx_handlers_fragments);
- DEBUGFS_STATS_ADD(tx_status_drop,
- local->tx_status_drop);
+ DEBUGFS_STATS_ADD(dot11TransmittedFragmentCount);
+ DEBUGFS_STATS_ADD(dot11MulticastTransmittedFrameCount);
+ DEBUGFS_STATS_ADD(dot11FailedCount);
+ DEBUGFS_STATS_ADD(dot11RetryCount);
+ DEBUGFS_STATS_ADD(dot11MultipleRetryCount);
+ DEBUGFS_STATS_ADD(dot11FrameDuplicateCount);
+ DEBUGFS_STATS_ADD(dot11ReceivedFragmentCount);
+ DEBUGFS_STATS_ADD(dot11MulticastReceivedFrameCount);
+ DEBUGFS_STATS_ADD(dot11TransmittedFrameCount);
+ DEBUGFS_STATS_ADD(tx_handlers_drop);
+ DEBUGFS_STATS_ADD(tx_handlers_queued);
+ DEBUGFS_STATS_ADD(tx_handlers_drop_wep);
+ DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc);
+ DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port);
+ DEBUGFS_STATS_ADD(rx_handlers_drop);
+ DEBUGFS_STATS_ADD(rx_handlers_queued);
+ DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc);
+ DEBUGFS_STATS_ADD(rx_handlers_drop_defrag);
+ DEBUGFS_STATS_ADD(rx_handlers_drop_short);
+ DEBUGFS_STATS_ADD(tx_expand_skb_head);
+ DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned);
+ DEBUGFS_STATS_ADD(rx_expand_skb_head_defrag);
+ DEBUGFS_STATS_ADD(rx_handlers_fragments);
+ DEBUGFS_STATS_ADD(tx_status_drop);
#endif
DEBUGFS_DEVSTATS_ADD(dot11ACKFailureCount);
DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 252859e90e8a..06d52935036d 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -29,8 +29,6 @@ static ssize_t sta_ ##name## _read(struct file *file, \
format_string, sta->field); \
}
#define STA_READ_D(name, field) STA_READ(name, field, "%d\n")
-#define STA_READ_U(name, field) STA_READ(name, field, "%u\n")
-#define STA_READ_S(name, field) STA_READ(name, field, "%s\n")
#define STA_OPS(name) \
static const struct file_operations sta_ ##name## _ops = { \
@@ -52,10 +50,7 @@ static const struct file_operations sta_ ##name## _ops = { \
STA_OPS(name)
STA_FILE(aid, sta.aid, D);
-STA_FILE(dev, sdata->name, S);
-STA_FILE(last_signal, last_signal, D);
STA_FILE(last_ack_signal, last_ack_signal, D);
-STA_FILE(beacon_loss_count, beacon_loss_count, D);
static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
@@ -101,40 +96,6 @@ static ssize_t sta_num_ps_buf_frames_read(struct file *file,
}
STA_OPS(num_ps_buf_frames);
-static ssize_t sta_inactive_ms_read(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
-{
- struct sta_info *sta = file->private_data;
- return mac80211_format_buffer(userbuf, count, ppos, "%d\n",
- jiffies_to_msecs(jiffies - sta->last_rx));
-}
-STA_OPS(inactive_ms);
-
-
-static ssize_t sta_connected_time_read(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
-{
- struct sta_info *sta = file->private_data;
- struct timespec uptime;
- struct tm result;
- long connected_time_secs;
- char buf[100];
- int res;
- ktime_get_ts(&uptime);
- connected_time_secs = uptime.tv_sec - sta->last_connected;
- time_to_tm(connected_time_secs, 0, &result);
- result.tm_year -= 70;
- result.tm_mday -= 1;
- res = scnprintf(buf, sizeof(buf),
- "years - %ld\nmonths - %d\ndays - %d\nclock - %d:%d:%d\n\n",
- result.tm_year, result.tm_mon, result.tm_mday,
- result.tm_hour, result.tm_min, result.tm_sec);
- return simple_read_from_buffer(userbuf, count, ppos, buf, res);
-}
-STA_OPS(connected_time);
-
-
-
static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
@@ -359,37 +320,6 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
}
STA_OPS(vht_capa);
-static ssize_t sta_current_tx_rate_read(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
-{
- struct sta_info *sta = file->private_data;
- struct rate_info rinfo;
- u16 rate;
- sta_set_rate_info_tx(sta, &sta->last_tx_rate, &rinfo);
- rate = cfg80211_calculate_bitrate(&rinfo);
-
- return mac80211_format_buffer(userbuf, count, ppos,
- "%d.%d MBit/s\n",
- rate/10, rate%10);
-}
-STA_OPS(current_tx_rate);
-
-static ssize_t sta_last_rx_rate_read(struct file *file, char __user *userbuf,
- size_t count, loff_t *ppos)
-{
- struct sta_info *sta = file->private_data;
- struct rate_info rinfo;
- u16 rate;
-
- sta_set_rate_info_rx(sta, &rinfo);
-
- rate = cfg80211_calculate_bitrate(&rinfo);
-
- return mac80211_format_buffer(userbuf, count, ppos,
- "%d.%d MBit/s\n",
- rate/10, rate%10);
-}
-STA_OPS(last_rx_rate);
#define DEBUGFS_ADD(name) \
debugfs_create_file(#name, 0400, \
@@ -432,30 +362,15 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
DEBUGFS_ADD(flags);
DEBUGFS_ADD(num_ps_buf_frames);
- DEBUGFS_ADD(inactive_ms);
- DEBUGFS_ADD(connected_time);
DEBUGFS_ADD(last_seq_ctrl);
DEBUGFS_ADD(agg_status);
- DEBUGFS_ADD(dev);
- DEBUGFS_ADD(last_signal);
- DEBUGFS_ADD(beacon_loss_count);
DEBUGFS_ADD(ht_capa);
DEBUGFS_ADD(vht_capa);
DEBUGFS_ADD(last_ack_signal);
- DEBUGFS_ADD(current_tx_rate);
- DEBUGFS_ADD(last_rx_rate);
- DEBUGFS_ADD_COUNTER(rx_packets, rx_packets);
- DEBUGFS_ADD_COUNTER(tx_packets, tx_packets);
- DEBUGFS_ADD_COUNTER(rx_bytes, rx_bytes);
- DEBUGFS_ADD_COUNTER(tx_bytes, tx_bytes);
DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates);
DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments);
- DEBUGFS_ADD_COUNTER(rx_dropped, rx_dropped);
- DEBUGFS_ADD_COUNTER(tx_fragments, tx_fragments);
DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count);
- DEBUGFS_ADD_COUNTER(tx_retry_failed, tx_retry_failed);
- DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count);
if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
debugfs_create_x32("driver_buffered_tids", 0400,
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 26e1ca8a474a..c01e681b90fb 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -417,12 +417,13 @@ static inline int drv_get_stats(struct ieee80211_local *local,
return ret;
}
-static inline void drv_get_tkip_seq(struct ieee80211_local *local,
- u8 hw_key_idx, u32 *iv32, u16 *iv16)
+static inline void drv_get_key_seq(struct ieee80211_local *local,
+ struct ieee80211_key *key,
+ struct ieee80211_key_seq *seq)
{
- if (local->ops->get_tkip_seq)
- local->ops->get_tkip_seq(&local->hw, hw_key_idx, iv32, iv16);
- trace_drv_get_tkip_seq(local, hw_key_idx, iv32, iv16);
+ if (local->ops->get_key_seq)
+ local->ops->get_key_seq(&local->hw, &key->conf, seq);
+ trace_drv_get_key_seq(local, &key->conf);
}
static inline int drv_set_frag_threshold(struct ieee80211_local *local,
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index 52bcea6ad9e8..188faab11c24 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -38,7 +38,7 @@ static void ieee80211_get_ringparam(struct net_device *dev,
static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = {
"rx_packets", "rx_bytes",
"rx_duplicates", "rx_fragments", "rx_dropped",
- "tx_packets", "tx_bytes", "tx_fragments",
+ "tx_packets", "tx_bytes",
"tx_filtered", "tx_retry_failed", "tx_retries",
"beacon_loss", "sta_state", "txrate", "rxrate", "signal",
"channel", "noise", "ch_time", "ch_time_busy",
@@ -87,7 +87,6 @@ static void ieee80211_get_stats(struct net_device *dev,
\
data[i++] += sinfo.tx_packets; \
data[i++] += sinfo.tx_bytes; \
- data[i++] += sta->tx_fragments; \
data[i++] += sta->tx_filtered_count; \
data[i++] += sta->tx_retry_failed; \
data[i++] += sta->tx_retry_count; \
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index bfef1b215050..21716af8bec3 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1031,8 +1031,11 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
}
}
- if (sta && elems->wmm_info && local->hw.queues >= IEEE80211_NUM_ACS)
+ if (sta && !sta->sta.wme &&
+ elems->wmm_info && local->hw.queues >= IEEE80211_NUM_ACS) {
sta->sta.wme = true;
+ ieee80211_check_fast_xmit(sta);
+ }
if (sta && elems->ht_operation && elems->ht_cap_elem &&
sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT &&
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ab46ab4a7249..2c4fe45ea38a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -181,8 +181,6 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
/**
* enum ieee80211_packet_rx_flags - packet RX flags
- * @IEEE80211_RX_RA_MATCH: frame is destined to interface currently processed
- * (incl. multicast frames)
* @IEEE80211_RX_FRAGMENTED: fragmented frame
* @IEEE80211_RX_AMSDU: a-MSDU packet
* @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed
@@ -192,7 +190,6 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
* @rx_flags field of &struct ieee80211_rx_status.
*/
enum ieee80211_packet_rx_flags {
- IEEE80211_RX_RA_MATCH = BIT(1),
IEEE80211_RX_FRAGMENTED = BIT(2),
IEEE80211_RX_AMSDU = BIT(3),
IEEE80211_RX_MALFORMED_ACTION_FRM = BIT(4),
@@ -725,7 +722,6 @@ struct ieee80211_if_mesh {
* enum ieee80211_sub_if_data_flags - virtual interface flags
*
* @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets
- * @IEEE80211_SDATA_PROMISC: interface is promisc
* @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode
* @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between
* associated stations and deliver multicast frames both
@@ -735,7 +731,6 @@ struct ieee80211_if_mesh {
*/
enum ieee80211_sub_if_data_flags {
IEEE80211_SDATA_ALLMULTI = BIT(0),
- IEEE80211_SDATA_PROMISC = BIT(1),
IEEE80211_SDATA_OPERATING_GMODE = BIT(2),
IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3),
IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4),
@@ -1043,7 +1038,6 @@ enum queue_stop_reason {
#ifdef CONFIG_MAC80211_LEDS
struct tpt_led_trigger {
- struct led_trigger trig;
char name[32];
const struct ieee80211_tpt_blink *blink_table;
unsigned int blink_table_len;
@@ -1211,8 +1205,8 @@ struct ieee80211_local {
atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES];
- /* number of interfaces with corresponding IFF_ flags */
- atomic_t iff_allmultis, iff_promiscs;
+ /* number of interfaces with allmulti RX */
+ atomic_t iff_allmultis;
struct rate_control_ref *rate_ctrl;
@@ -1264,6 +1258,15 @@ struct ieee80211_local {
struct list_head chanctx_list;
struct mutex chanctx_mtx;
+#ifdef CONFIG_MAC80211_LEDS
+ struct led_trigger tx_led, rx_led, assoc_led, radio_led;
+ struct led_trigger tpt_led;
+ atomic_t tx_led_active, rx_led_active, assoc_led_active;
+ atomic_t radio_led_active, tpt_led_active;
+ struct tpt_led_trigger *tpt_led_trigger;
+#endif
+
+#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
/* SNMP counters */
/* dot11CountersTable */
u32 dot11TransmittedFragmentCount;
@@ -1276,18 +1279,9 @@ struct ieee80211_local {
u32 dot11MulticastReceivedFrameCount;
u32 dot11TransmittedFrameCount;
-#ifdef CONFIG_MAC80211_LEDS
- struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led;
- struct tpt_led_trigger *tpt_led_trigger;
- char tx_led_name[32], rx_led_name[32],
- assoc_led_name[32], radio_led_name[32];
-#endif
-
-#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
/* TX/RX handler statistics */
unsigned int tx_handlers_drop;
unsigned int tx_handlers_queued;
- unsigned int tx_handlers_drop_fragment;
unsigned int tx_handlers_drop_wep;
unsigned int tx_handlers_drop_not_assoc;
unsigned int tx_handlers_drop_unauth_port;
@@ -1298,8 +1292,7 @@ struct ieee80211_local {
unsigned int rx_handlers_drop_short;
unsigned int tx_expand_skb_head;
unsigned int tx_expand_skb_head_cloned;
- unsigned int rx_expand_skb_head;
- unsigned int rx_expand_skb_head2;
+ unsigned int rx_expand_skb_head_defrag;
unsigned int rx_handlers_fragments;
unsigned int tx_status_drop;
#define I802_DEBUG_INC(c) (c)++
@@ -1651,6 +1644,11 @@ struct sk_buff *
ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u32 info_flags);
+void ieee80211_check_fast_xmit(struct sta_info *sta);
+void ieee80211_check_fast_xmit_all(struct ieee80211_local *local);
+void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata);
+void ieee80211_clear_fast_xmit(struct sta_info *sta);
+
/* HT */
void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta_ht_cap *ht_cap);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b4ac596a7cb7..4ee8fea263ed 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -697,9 +697,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
atomic_inc(&local->iff_allmultis);
- if (sdata->flags & IEEE80211_SDATA_PROMISC)
- atomic_inc(&local->iff_promiscs);
-
if (coming_up)
local->open_count++;
@@ -819,21 +816,20 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
* (because if we remove a STA after ops->remove_interface()
* the driver will have removed the vif info already!)
*
- * This is relevant only in WDS mode, in all other modes we've
- * already removed all stations when disconnecting or similar,
- * so warn otherwise.
+ * In WDS mode a station must exist here and be flushed, for
+ * AP_VLANs stations may exist since there's nothing else that
+ * would have removed them, but in other modes there shouldn't
+ * be any stations.
*/
flushed = sta_info_flush(sdata);
- WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) ||
- (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1));
+ WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
+ ((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) ||
+ (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)));
- /* don't count this interface for promisc/allmulti while it is down */
+ /* don't count this interface for allmulti while it is down */
if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
atomic_dec(&local->iff_allmultis);
- if (sdata->flags & IEEE80211_SDATA_PROMISC)
- atomic_dec(&local->iff_promiscs);
-
if (sdata->vif.type == NL80211_IFTYPE_AP) {
local->fif_pspoll--;
local->fif_probe_req--;
@@ -1047,12 +1043,10 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
- int allmulti, promisc, sdata_allmulti, sdata_promisc;
+ int allmulti, sdata_allmulti;
allmulti = !!(dev->flags & IFF_ALLMULTI);
- promisc = !!(dev->flags & IFF_PROMISC);
sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI);
- sdata_promisc = !!(sdata->flags & IEEE80211_SDATA_PROMISC);
if (allmulti != sdata_allmulti) {
if (dev->flags & IFF_ALLMULTI)
@@ -1062,13 +1056,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
sdata->flags ^= IEEE80211_SDATA_ALLMULTI;
}
- if (promisc != sdata_promisc) {
- if (dev->flags & IFF_PROMISC)
- atomic_inc(&local->iff_promiscs);
- else
- atomic_dec(&local->iff_promiscs);
- sdata->flags ^= IEEE80211_SDATA_PROMISC;
- }
spin_lock_bh(&local->filter_lock);
__hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
spin_unlock_bh(&local->filter_lock);
@@ -1109,6 +1096,35 @@ static u16 ieee80211_netdev_select_queue(struct net_device *dev,
return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
}
+static struct rtnl_link_stats64 *
+ieee80211_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_sw_netstats *tstats;
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ unsigned int start;
+
+ tstats = per_cpu_ptr(dev->tstats, i);
+
+ do {
+ start = u64_stats_fetch_begin_irq(&tstats->syncp);
+ rx_packets = tstats->rx_packets;
+ tx_packets = tstats->tx_packets;
+ rx_bytes = tstats->rx_bytes;
+ tx_bytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
+
+ stats->rx_packets += rx_packets;
+ stats->tx_packets += tx_packets;
+ stats->rx_bytes += rx_bytes;
+ stats->tx_bytes += tx_bytes;
+ }
+
+ return stats;
+}
+
static const struct net_device_ops ieee80211_dataif_ops = {
.ndo_open = ieee80211_open,
.ndo_stop = ieee80211_stop,
@@ -1118,6 +1134,7 @@ static const struct net_device_ops ieee80211_dataif_ops = {
.ndo_change_mtu = ieee80211_change_mtu,
.ndo_set_mac_address = ieee80211_change_mac,
.ndo_select_queue = ieee80211_netdev_select_queue,
+ .ndo_get_stats64 = ieee80211_get_stats64,
};
static u16 ieee80211_monitor_select_queue(struct net_device *dev,
@@ -1151,14 +1168,21 @@ static const struct net_device_ops ieee80211_monitorif_ops = {
.ndo_change_mtu = ieee80211_change_mtu,
.ndo_set_mac_address = ieee80211_change_mac,
.ndo_select_queue = ieee80211_monitor_select_queue,
+ .ndo_get_stats64 = ieee80211_get_stats64,
};
+static void ieee80211_if_free(struct net_device *dev)
+{
+ free_percpu(dev->tstats);
+ free_netdev(dev);
+}
+
static void ieee80211_if_setup(struct net_device *dev)
{
ether_setup(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->netdev_ops = &ieee80211_dataif_ops;
- dev->destructor = free_netdev;
+ dev->destructor = ieee80211_if_free;
}
static void ieee80211_iface_work(struct work_struct *work)
@@ -1699,6 +1723,12 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
return -ENOMEM;
dev_net_set(ndev, wiphy_net(local->hw.wiphy));
+ ndev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!ndev->tstats) {
+ free_netdev(ndev);
+ return -ENOMEM;
+ }
+
ndev->needed_headroom = local->tx_headroom +
4*6 /* four MAC addresses */
+ 2 + 2 + 2 + 2 /* ctl, dur, seq, qos */
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 2291cd730091..2e677376c958 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -229,6 +229,7 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
if (uni) {
rcu_assign_pointer(sdata->default_unicast_key, key);
+ ieee80211_check_fast_xmit_iface(sdata);
drv_set_default_unicast_key(sdata->local, sdata, idx);
}
@@ -298,6 +299,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
if (pairwise) {
rcu_assign_pointer(sta->ptk[idx], new);
sta->ptk_idx = idx;
+ ieee80211_check_fast_xmit(sta);
} else {
rcu_assign_pointer(sta->gtk[idx], new);
sta->gtk_idx = idx;
@@ -483,15 +485,17 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
break;
default:
if (cs) {
- size_t len = (seq_len > MAX_PN_LEN) ?
- MAX_PN_LEN : seq_len;
+ if (seq_len && seq_len != cs->pn_len) {
+ kfree(key);
+ return ERR_PTR(-EINVAL);
+ }
key->conf.iv_len = cs->hdr_len;
key->conf.icv_len = cs->mic_len;
for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++)
- for (j = 0; j < len; j++)
+ for (j = 0; j < seq_len; j++)
key->u.gen.rx_pn[i][j] =
- seq[len - j - 1];
+ seq[seq_len - j - 1];
key->flags |= KEY_FLAG_CIPHER_SCHEME;
}
}
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index c5a31835be0e..df430a618764 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -18,7 +18,6 @@
#define NUM_DEFAULT_KEYS 4
#define NUM_DEFAULT_MGMT_KEYS 2
-#define MAX_PN_LEN 16
struct ieee80211_local;
struct ieee80211_sub_if_data;
@@ -116,7 +115,7 @@ struct ieee80211_key {
} gcmp;
struct {
/* generic cipher scheme */
- u8 rx_pn[IEEE80211_NUM_TIDS + 1][MAX_PN_LEN];
+ u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_MAX_PN_LEN];
} gen;
} u;
diff --git a/net/mac80211/led.c b/net/mac80211/led.c
index e2b836446af3..0505845b7ab8 100644
--- a/net/mac80211/led.c
+++ b/net/mac80211/led.c
@@ -12,96 +12,175 @@
#include <linux/export.h>
#include "led.h"
-#define MAC80211_BLINK_DELAY 50 /* ms */
-
-void ieee80211_led_rx(struct ieee80211_local *local)
-{
- unsigned long led_delay = MAC80211_BLINK_DELAY;
- if (unlikely(!local->rx_led))
- return;
- led_trigger_blink_oneshot(local->rx_led, &led_delay, &led_delay, 0);
-}
-
-void ieee80211_led_tx(struct ieee80211_local *local)
-{
- unsigned long led_delay = MAC80211_BLINK_DELAY;
- if (unlikely(!local->tx_led))
- return;
- led_trigger_blink_oneshot(local->tx_led, &led_delay, &led_delay, 0);
-}
-
void ieee80211_led_assoc(struct ieee80211_local *local, bool associated)
{
- if (unlikely(!local->assoc_led))
+ if (!atomic_read(&local->assoc_led_active))
return;
if (associated)
- led_trigger_event(local->assoc_led, LED_FULL);
+ led_trigger_event(&local->assoc_led, LED_FULL);
else
- led_trigger_event(local->assoc_led, LED_OFF);
+ led_trigger_event(&local->assoc_led, LED_OFF);
}
void ieee80211_led_radio(struct ieee80211_local *local, bool enabled)
{
- if (unlikely(!local->radio_led))
+ if (!atomic_read(&local->radio_led_active))
return;
if (enabled)
- led_trigger_event(local->radio_led, LED_FULL);
+ led_trigger_event(&local->radio_led, LED_FULL);
else
- led_trigger_event(local->radio_led, LED_OFF);
+ led_trigger_event(&local->radio_led, LED_OFF);
+}
+
+void ieee80211_alloc_led_names(struct ieee80211_local *local)
+{
+ local->rx_led.name = kasprintf(GFP_KERNEL, "%srx",
+ wiphy_name(local->hw.wiphy));
+ local->tx_led.name = kasprintf(GFP_KERNEL, "%stx",
+ wiphy_name(local->hw.wiphy));
+ local->assoc_led.name = kasprintf(GFP_KERNEL, "%sassoc",
+ wiphy_name(local->hw.wiphy));
+ local->radio_led.name = kasprintf(GFP_KERNEL, "%sradio",
+ wiphy_name(local->hw.wiphy));
+}
+
+void ieee80211_free_led_names(struct ieee80211_local *local)
+{
+ kfree(local->rx_led.name);
+ kfree(local->tx_led.name);
+ kfree(local->assoc_led.name);
+ kfree(local->radio_led.name);
+}
+
+static void ieee80211_tx_led_activate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ tx_led);
+
+ atomic_inc(&local->tx_led_active);
+}
+
+static void ieee80211_tx_led_deactivate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ tx_led);
+
+ atomic_dec(&local->tx_led_active);
+}
+
+static void ieee80211_rx_led_activate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ rx_led);
+
+ atomic_inc(&local->rx_led_active);
+}
+
+static void ieee80211_rx_led_deactivate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ rx_led);
+
+ atomic_dec(&local->rx_led_active);
+}
+
+static void ieee80211_assoc_led_activate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ assoc_led);
+
+ atomic_inc(&local->assoc_led_active);
+}
+
+static void ieee80211_assoc_led_deactivate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ assoc_led);
+
+ atomic_dec(&local->assoc_led_active);
+}
+
+static void ieee80211_radio_led_activate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ radio_led);
+
+ atomic_inc(&local->radio_led_active);
+}
+
+static void ieee80211_radio_led_deactivate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ radio_led);
+
+ atomic_dec(&local->radio_led_active);
+}
+
+static void ieee80211_tpt_led_activate(struct led_classdev *led_cdev)
+{
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ tpt_led);
+
+ atomic_inc(&local->tpt_led_active);
}
-void ieee80211_led_names(struct ieee80211_local *local)
+static void ieee80211_tpt_led_deactivate(struct led_classdev *led_cdev)
{
- snprintf(local->rx_led_name, sizeof(local->rx_led_name),
- "%srx", wiphy_name(local->hw.wiphy));
- snprintf(local->tx_led_name, sizeof(local->tx_led_name),
- "%stx", wiphy_name(local->hw.wiphy));
- snprintf(local->assoc_led_name, sizeof(local->assoc_led_name),
- "%sassoc", wiphy_name(local->hw.wiphy));
- snprintf(local->radio_led_name, sizeof(local->radio_led_name),
- "%sradio", wiphy_name(local->hw.wiphy));
+ struct ieee80211_local *local = container_of(led_cdev->trigger,
+ struct ieee80211_local,
+ tpt_led);
+
+ atomic_dec(&local->tpt_led_active);
}
void ieee80211_led_init(struct ieee80211_local *local)
{
- local->rx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
- if (local->rx_led) {
- local->rx_led->name = local->rx_led_name;
- if (led_trigger_register(local->rx_led)) {
- kfree(local->rx_led);
- local->rx_led = NULL;
- }
+ atomic_set(&local->rx_led_active, 0);
+ local->rx_led.activate = ieee80211_rx_led_activate;
+ local->rx_led.deactivate = ieee80211_rx_led_deactivate;
+ if (local->rx_led.name && led_trigger_register(&local->rx_led)) {
+ kfree(local->rx_led.name);
+ local->rx_led.name = NULL;
}
- local->tx_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
- if (local->tx_led) {
- local->tx_led->name = local->tx_led_name;
- if (led_trigger_register(local->tx_led)) {
- kfree(local->tx_led);
- local->tx_led = NULL;
- }
+ atomic_set(&local->tx_led_active, 0);
+ local->tx_led.activate = ieee80211_tx_led_activate;
+ local->tx_led.deactivate = ieee80211_tx_led_deactivate;
+ if (local->tx_led.name && led_trigger_register(&local->tx_led)) {
+ kfree(local->tx_led.name);
+ local->tx_led.name = NULL;
}
- local->assoc_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
- if (local->assoc_led) {
- local->assoc_led->name = local->assoc_led_name;
- if (led_trigger_register(local->assoc_led)) {
- kfree(local->assoc_led);
- local->assoc_led = NULL;
- }
+ atomic_set(&local->assoc_led_active, 0);
+ local->assoc_led.activate = ieee80211_assoc_led_activate;
+ local->assoc_led.deactivate = ieee80211_assoc_led_deactivate;
+ if (local->assoc_led.name && led_trigger_register(&local->assoc_led)) {
+ kfree(local->assoc_led.name);
+ local->assoc_led.name = NULL;
}
- local->radio_led = kzalloc(sizeof(struct led_trigger), GFP_KERNEL);
- if (local->radio_led) {
- local->radio_led->name = local->radio_led_name;
- if (led_trigger_register(local->radio_led)) {
- kfree(local->radio_led);
- local->radio_led = NULL;
- }
+ atomic_set(&local->radio_led_active, 0);
+ local->radio_led.activate = ieee80211_radio_led_activate;
+ local->radio_led.deactivate = ieee80211_radio_led_deactivate;
+ if (local->radio_led.name && led_trigger_register(&local->radio_led)) {
+ kfree(local->radio_led.name);
+ local->radio_led.name = NULL;
}
+ atomic_set(&local->tpt_led_active, 0);
if (local->tpt_led_trigger) {
- if (led_trigger_register(&local->tpt_led_trigger->trig)) {
+ local->tpt_led.activate = ieee80211_tpt_led_activate;
+ local->tpt_led.deactivate = ieee80211_tpt_led_deactivate;
+ if (led_trigger_register(&local->tpt_led)) {
kfree(local->tpt_led_trigger);
local->tpt_led_trigger = NULL;
}
@@ -110,58 +189,50 @@ void ieee80211_led_init(struct ieee80211_local *local)
void ieee80211_led_exit(struct ieee80211_local *local)
{
- if (local->radio_led) {
- led_trigger_unregister(local->radio_led);
- kfree(local->radio_led);
- }
- if (local->assoc_led) {
- led_trigger_unregister(local->assoc_led);
- kfree(local->assoc_led);
- }
- if (local->tx_led) {
- led_trigger_unregister(local->tx_led);
- kfree(local->tx_led);
- }
- if (local->rx_led) {
- led_trigger_unregister(local->rx_led);
- kfree(local->rx_led);
- }
+ if (local->radio_led.name)
+ led_trigger_unregister(&local->radio_led);
+ if (local->assoc_led.name)
+ led_trigger_unregister(&local->assoc_led);
+ if (local->tx_led.name)
+ led_trigger_unregister(&local->tx_led);
+ if (local->rx_led.name)
+ led_trigger_unregister(&local->rx_led);
if (local->tpt_led_trigger) {
- led_trigger_unregister(&local->tpt_led_trigger->trig);
+ led_trigger_unregister(&local->tpt_led);
kfree(local->tpt_led_trigger);
}
}
-char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
+const char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
- return local->radio_led_name;
+ return local->radio_led.name;
}
EXPORT_SYMBOL(__ieee80211_get_radio_led_name);
-char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw)
+const char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
- return local->assoc_led_name;
+ return local->assoc_led.name;
}
EXPORT_SYMBOL(__ieee80211_get_assoc_led_name);
-char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw)
+const char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
- return local->tx_led_name;
+ return local->tx_led.name;
}
EXPORT_SYMBOL(__ieee80211_get_tx_led_name);
-char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw)
+const char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
- return local->rx_led_name;
+ return local->rx_led.name;
}
EXPORT_SYMBOL(__ieee80211_get_rx_led_name);
@@ -205,16 +276,17 @@ static void tpt_trig_timer(unsigned long data)
}
}
- read_lock(&tpt_trig->trig.leddev_list_lock);
- list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list)
+ read_lock(&local->tpt_led.leddev_list_lock);
+ list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list)
led_blink_set(led_cdev, &on, &off);
- read_unlock(&tpt_trig->trig.leddev_list_lock);
+ read_unlock(&local->tpt_led.leddev_list_lock);
}
-char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
- unsigned int flags,
- const struct ieee80211_tpt_blink *blink_table,
- unsigned int blink_table_len)
+const char *
+__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
+ unsigned int flags,
+ const struct ieee80211_tpt_blink *blink_table,
+ unsigned int blink_table_len)
{
struct ieee80211_local *local = hw_to_local(hw);
struct tpt_led_trigger *tpt_trig;
@@ -229,7 +301,7 @@ char *__ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw,
snprintf(tpt_trig->name, sizeof(tpt_trig->name),
"%stpt", wiphy_name(local->hw.wiphy));
- tpt_trig->trig.name = tpt_trig->name;
+ local->tpt_led.name = tpt_trig->name;
tpt_trig->blink_table = blink_table;
tpt_trig->blink_table_len = blink_table_len;
@@ -269,10 +341,10 @@ static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local)
tpt_trig->running = false;
del_timer_sync(&tpt_trig->timer);
- read_lock(&tpt_trig->trig.leddev_list_lock);
- list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list)
+ read_lock(&local->tpt_led.leddev_list_lock);
+ list_for_each_entry(led_cdev, &local->tpt_led.led_cdevs, trig_list)
led_set_brightness(led_cdev, LED_OFF);
- read_unlock(&tpt_trig->trig.leddev_list_lock);
+ read_unlock(&local->tpt_led.leddev_list_lock);
}
void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
diff --git a/net/mac80211/led.h b/net/mac80211/led.h
index 89f4344f13b9..a7893a1ac98b 100644
--- a/net/mac80211/led.h
+++ b/net/mac80211/led.h
@@ -11,25 +11,42 @@
#include <linux/leds.h>
#include "ieee80211_i.h"
+#define MAC80211_BLINK_DELAY 50 /* ms */
+
+static inline void ieee80211_led_rx(struct ieee80211_local *local)
+{
+#ifdef CONFIG_MAC80211_LEDS
+ unsigned long led_delay = MAC80211_BLINK_DELAY;
+
+ if (!atomic_read(&local->rx_led_active))
+ return;
+ led_trigger_blink_oneshot(&local->rx_led, &led_delay, &led_delay, 0);
+#endif
+}
+
+static inline void ieee80211_led_tx(struct ieee80211_local *local)
+{
+#ifdef CONFIG_MAC80211_LEDS
+ unsigned long led_delay = MAC80211_BLINK_DELAY;
+
+ if (!atomic_read(&local->tx_led_active))
+ return;
+ led_trigger_blink_oneshot(&local->tx_led, &led_delay, &led_delay, 0);
+#endif
+}
+
#ifdef CONFIG_MAC80211_LEDS
-void ieee80211_led_rx(struct ieee80211_local *local);
-void ieee80211_led_tx(struct ieee80211_local *local);
void ieee80211_led_assoc(struct ieee80211_local *local,
bool associated);
void ieee80211_led_radio(struct ieee80211_local *local,
bool enabled);
-void ieee80211_led_names(struct ieee80211_local *local);
+void ieee80211_alloc_led_names(struct ieee80211_local *local);
+void ieee80211_free_led_names(struct ieee80211_local *local);
void ieee80211_led_init(struct ieee80211_local *local);
void ieee80211_led_exit(struct ieee80211_local *local);
void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
unsigned int types_on, unsigned int types_off);
#else
-static inline void ieee80211_led_rx(struct ieee80211_local *local)
-{
-}
-static inline void ieee80211_led_tx(struct ieee80211_local *local)
-{
-}
static inline void ieee80211_led_assoc(struct ieee80211_local *local,
bool associated)
{
@@ -38,7 +55,10 @@ static inline void ieee80211_led_radio(struct ieee80211_local *local,
bool enabled)
{
}
-static inline void ieee80211_led_names(struct ieee80211_local *local)
+static inline void ieee80211_alloc_led_names(struct ieee80211_local *local)
+{
+}
+static inline void ieee80211_free_led_names(struct ieee80211_local *local)
{
}
static inline void ieee80211_led_init(struct ieee80211_local *local)
@@ -58,7 +78,7 @@ static inline void
ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, __le16 fc, int bytes)
{
#ifdef CONFIG_MAC80211_LEDS
- if (local->tpt_led_trigger && ieee80211_is_data(fc))
+ if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active))
local->tpt_led_trigger->tx_bytes += bytes;
#endif
}
@@ -67,7 +87,7 @@ static inline void
ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, __le16 fc, int bytes)
{
#ifdef CONFIG_MAC80211_LEDS
- if (local->tpt_led_trigger && ieee80211_is_data(fc))
+ if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active))
local->tpt_led_trigger->rx_bytes += bytes;
#endif
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index df3051d96aff..3c956c5f99b2 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -41,9 +41,6 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
unsigned int changed_flags;
unsigned int new_flags = 0;
- if (atomic_read(&local->iff_promiscs))
- new_flags |= FIF_PROMISC_IN_BSS;
-
if (atomic_read(&local->iff_allmultis))
new_flags |= FIF_ALLMULTI;
@@ -646,7 +643,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
skb_queue_head_init(&local->skb_queue);
skb_queue_head_init(&local->skb_queue_unreliable);
- ieee80211_led_names(local);
+ ieee80211_alloc_led_names(local);
ieee80211_roc_setup(local);
@@ -771,8 +768,11 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_256;
}
- for (r = 0; r < local->hw.n_cipher_schemes; r++)
+ for (r = 0; r < local->hw.n_cipher_schemes; r++) {
suites[w++] = cs[r].cipher;
+ if (WARN_ON(cs[r].pn_len > IEEE80211_MAX_PN_LEN))
+ return -EINVAL;
+ }
}
local->hw.wiphy->cipher_suites = suites;
@@ -840,7 +840,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
/* Only HW csum features are currently compatible with mac80211 */
feature_whitelist = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
- NETIF_F_HW_CSUM;
+ NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_HIGHDMA |
+ NETIF_F_GSO_SOFTWARE;
if (WARN_ON(hw->netdev_features & ~feature_whitelist))
return -EINVAL;
@@ -1209,6 +1210,8 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
sta_info_stop(local);
+ ieee80211_free_led_names(local);
+
wiphy_free(local->hw.wiphy);
}
EXPORT_SYMBOL(ieee80211_free_hw);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 60d737f144e3..ac843fc88745 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -72,10 +72,11 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata,
*
* @sta: mesh peer link to restart
*
- * Locking: this function must be called holding sta->lock
+ * Locking: this function must be called holding sta->plink_lock
*/
static inline void mesh_plink_fsm_restart(struct sta_info *sta)
{
+ lockdep_assert_held(&sta->plink_lock);
sta->plink_state = NL80211_PLINK_LISTEN;
sta->llid = sta->plid = sta->reason = 0;
sta->plink_retries = 0;
@@ -213,13 +214,15 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
* All mesh paths with this peer as next hop will be flushed
* Returns beacon changed flag if the beacon content changed.
*
- * Locking: the caller must hold sta->lock
+ * Locking: the caller must hold sta->plink_lock
*/
static u32 __mesh_plink_deactivate(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
u32 changed = 0;
+ lockdep_assert_held(&sta->plink_lock);
+
if (sta->plink_state == NL80211_PLINK_ESTAB)
changed = mesh_plink_dec_estab_count(sdata);
sta->plink_state = NL80211_PLINK_BLOCKED;
@@ -244,13 +247,13 @@ u32 mesh_plink_deactivate(struct sta_info *sta)
struct ieee80211_sub_if_data *sdata = sta->sdata;
u32 changed;
- spin_lock_bh(&sta->lock);
+ spin_lock_bh(&sta->plink_lock);
changed = __mesh_plink_deactivate(sta);
sta->reason = WLAN_REASON_MESH_PEER_CANCELED;
mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE,
sta->sta.addr, sta->llid, sta->plid,
sta->reason);
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
return changed;
}
@@ -387,7 +390,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
sband = local->hw.wiphy->bands[band];
rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates);
- spin_lock_bh(&sta->lock);
+ spin_lock_bh(&sta->plink_lock);
sta->last_rx = jiffies;
/* rates and capabilities don't change during peering */
@@ -419,7 +422,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
else
rate_control_rate_update(local, sband, sta, changed);
out:
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
}
static struct sta_info *
@@ -552,7 +555,7 @@ static void mesh_plink_timer(unsigned long data)
if (sta->sdata->local->quiescing)
return;
- spin_lock_bh(&sta->lock);
+ spin_lock_bh(&sta->plink_lock);
/* If a timer fires just before a state transition on another CPU,
* we may have already extended the timeout and changed state by the
@@ -563,7 +566,7 @@ static void mesh_plink_timer(unsigned long data)
mpl_dbg(sta->sdata,
"Ignoring timer for %pM in state %s (timer adjusted)",
sta->sta.addr, mplstates[sta->plink_state]);
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
return;
}
@@ -573,7 +576,7 @@ static void mesh_plink_timer(unsigned long data)
mpl_dbg(sta->sdata,
"Ignoring timer for %pM in state %s (timer deleted)",
sta->sta.addr, mplstates[sta->plink_state]);
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
return;
}
@@ -619,7 +622,7 @@ static void mesh_plink_timer(unsigned long data)
default:
break;
}
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
if (action)
mesh_plink_frame_tx(sdata, action, sta->sta.addr,
sta->llid, sta->plid, reason);
@@ -674,16 +677,16 @@ u32 mesh_plink_open(struct sta_info *sta)
if (!test_sta_flag(sta, WLAN_STA_AUTH))
return 0;
- spin_lock_bh(&sta->lock);
+ spin_lock_bh(&sta->plink_lock);
sta->llid = mesh_get_new_llid(sdata);
if (sta->plink_state != NL80211_PLINK_LISTEN &&
sta->plink_state != NL80211_PLINK_BLOCKED) {
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
return 0;
}
sta->plink_state = NL80211_PLINK_OPN_SNT;
mesh_plink_timer_set(sta, sdata->u.mesh.mshcfg.dot11MeshRetryTimeout);
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
mpl_dbg(sdata,
"Mesh plink: starting establishment with %pM\n",
sta->sta.addr);
@@ -700,10 +703,10 @@ u32 mesh_plink_block(struct sta_info *sta)
{
u32 changed;
- spin_lock_bh(&sta->lock);
+ spin_lock_bh(&sta->plink_lock);
changed = __mesh_plink_deactivate(sta);
sta->plink_state = NL80211_PLINK_BLOCKED;
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
return changed;
}
@@ -758,7 +761,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
mpl_dbg(sdata, "peer %pM in state %s got event %s\n", sta->sta.addr,
mplstates[sta->plink_state], mplevents[event]);
- spin_lock_bh(&sta->lock);
+ spin_lock_bh(&sta->plink_lock);
switch (sta->plink_state) {
case NL80211_PLINK_LISTEN:
switch (event) {
@@ -872,7 +875,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
*/
break;
}
- spin_unlock_bh(&sta->lock);
+ spin_unlock_bh(&sta->plink_lock);
if (action) {
mesh_plink_frame_tx(sdata, action, sta->sta.addr,
sta->llid, sta->plid, sta->reason);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 26053bf2faa8..3294666f599c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4307,15 +4307,15 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
}
static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_bss *cbss, bool assoc)
+ struct cfg80211_bss *cbss, bool assoc,
+ bool override)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_bss *bss = (void *)cbss->priv;
struct sta_info *new_sta = NULL;
struct ieee80211_supported_band *sband;
- struct ieee80211_sta_ht_cap sta_ht_cap;
- bool have_sta = false, is_override = false;
+ bool have_sta = false;
int err;
sband = local->hw.wiphy->bands[cbss->channel->band];
@@ -4335,14 +4335,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
return -ENOMEM;
}
- memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
- ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
-
- is_override = (sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) !=
- (sband->ht_cap.cap &
- IEEE80211_HT_CAP_SUP_WIDTH_20_40);
-
- if (new_sta || is_override) {
+ if (new_sta || override) {
err = ieee80211_prep_channel(sdata, cbss);
if (err) {
if (new_sta)
@@ -4552,7 +4545,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid);
- err = ieee80211_prep_connection(sdata, req->bss, false);
+ err = ieee80211_prep_connection(sdata, req->bss, false, false);
if (err)
goto err_clear;
@@ -4624,6 +4617,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband;
const u8 *ssidie, *ht_ie, *vht_ie;
int i, err;
+ bool override = false;
assoc_data = kzalloc(sizeof(*assoc_data) + req->ie_len, GFP_KERNEL);
if (!assoc_data)
@@ -4728,14 +4722,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
}
}
- if (req->flags & ASSOC_REQ_DISABLE_HT) {
- ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
- }
-
- if (req->flags & ASSOC_REQ_DISABLE_VHT)
- ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
-
/* Also disable HT if we don't support it or the AP doesn't use WMM */
sband = local->hw.wiphy->bands[req->bss->channel->band];
if (!sband->ht_cap.ht_supported ||
@@ -4847,7 +4833,36 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
ifmgd->dtim_period = 0;
ifmgd->have_beacon = false;
- err = ieee80211_prep_connection(sdata, req->bss, true);
+ /* override HT/VHT configuration only if the AP and we support it */
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
+ struct ieee80211_sta_ht_cap sta_ht_cap;
+
+ if (req->flags & ASSOC_REQ_DISABLE_HT)
+ override = true;
+
+ memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
+ ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
+
+ /* check for 40 MHz disable override */
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ) &&
+ sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 &&
+ !(sta_ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40))
+ override = true;
+
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
+ req->flags & ASSOC_REQ_DISABLE_VHT)
+ override = true;
+ }
+
+ if (req->flags & ASSOC_REQ_DISABLE_HT) {
+ ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
+ ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+ }
+
+ if (req->flags & ASSOC_REQ_DISABLE_VHT)
+ ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+
+ err = ieee80211_prep_connection(sdata, req->bss, true, override);
if (err)
goto err_clear;
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index d53355b011f5..de69adf24f53 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -683,7 +683,13 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
if (sdata->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
return;
- ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
+ if (ista) {
+ spin_lock_bh(&sta->rate_ctrl_lock);
+ ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
+ spin_unlock_bh(&sta->rate_ctrl_lock);
+ } else {
+ ref->ops->get_rate(ref->priv, NULL, NULL, txrc);
+ }
if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_RC_TABLE)
return;
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 38652f09feaf..25c9be5dd7fd 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -42,10 +42,12 @@ static inline void rate_control_tx_status(struct ieee80211_local *local,
if (!ref || !test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
return;
+ spin_lock_bh(&sta->rate_ctrl_lock);
if (ref->ops->tx_status)
ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb);
else
ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info);
+ spin_unlock_bh(&sta->rate_ctrl_lock);
}
static inline void
@@ -64,7 +66,9 @@ rate_control_tx_status_noskb(struct ieee80211_local *local,
if (WARN_ON_ONCE(!ref->ops->tx_status_noskb))
return;
+ spin_lock_bh(&sta->rate_ctrl_lock);
ref->ops->tx_status_noskb(ref->priv, sband, ista, priv_sta, info);
+ spin_unlock_bh(&sta->rate_ctrl_lock);
}
static inline void rate_control_rate_init(struct sta_info *sta)
@@ -91,8 +95,10 @@ static inline void rate_control_rate_init(struct sta_info *sta)
sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band];
+ spin_lock_bh(&sta->rate_ctrl_lock);
ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista,
priv_sta);
+ spin_unlock_bh(&sta->rate_ctrl_lock);
rcu_read_unlock();
set_sta_flag(sta, WLAN_STA_RATE_CONTROL);
}
@@ -115,18 +121,20 @@ static inline void rate_control_rate_update(struct ieee80211_local *local,
return;
}
+ spin_lock_bh(&sta->rate_ctrl_lock);
ref->ops->rate_update(ref->priv, sband, &chanctx_conf->def,
ista, priv_sta, changed);
+ spin_unlock_bh(&sta->rate_ctrl_lock);
rcu_read_unlock();
}
drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
}
static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
- struct ieee80211_sta *sta,
- gfp_t gfp)
+ struct sta_info *sta, gfp_t gfp)
{
- return ref->ops->alloc_sta(ref->priv, sta, gfp);
+ spin_lock_init(&sta->rate_ctrl_lock);
+ return ref->ops->alloc_sta(ref->priv, &sta->sta, gfp);
}
static inline void rate_control_free_sta(struct sta_info *sta)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 260eed45b6d2..aa35977a9c4d 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -32,6 +32,16 @@
#include "wme.h"
#include "rate.h"
+static inline void ieee80211_rx_stats(struct net_device *dev, u32 len)
+{
+ struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->rx_packets++;
+ tstats->rx_bytes += len;
+ u64_stats_update_end(&tstats->syncp);
+}
+
/*
* monitor mode reception
*
@@ -529,8 +539,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
}
prev_dev = sdata->dev;
- sdata->dev->stats.rx_packets++;
- sdata->dev->stats.rx_bytes += skb->len;
+ ieee80211_rx_stats(sdata->dev, skb->len);
}
if (prev_dev) {
@@ -981,7 +990,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
struct sk_buff *skb = rx->skb;
struct ieee80211_local *local = rx->local;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct sta_info *sta = rx->sta;
struct tid_ampdu_rx *tid_agg_rx;
u16 sc;
@@ -1016,10 +1024,6 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
goto dont_reorder;
- /* not actually part of this BA session */
- if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
- goto dont_reorder;
-
/* new, potentially un-ordered, ampdu frame - process it */
/* reset session timer */
@@ -1073,10 +1077,8 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
rx->sta->last_seq_ctrl[rx->seqno_idx] ==
hdr->seq_ctrl)) {
- if (status->rx_flags & IEEE80211_RX_RA_MATCH) {
- rx->local->dot11FrameDuplicateCount++;
- rx->sta->num_duplicates++;
- }
+ I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount);
+ rx->sta->num_duplicates++;
return RX_DROP_UNUSABLE;
} else if (!(status->flag & RX_FLAG_AMSDU_MORE)) {
rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl;
@@ -1200,6 +1202,8 @@ static void sta_ps_start(struct sta_info *sta)
ps_dbg(sdata, "STA %pM aid %d enters power save mode\n",
sta->sta.addr, sta->sta.aid);
+ ieee80211_clear_fast_xmit(sta);
+
if (!sta->sta.txq[0])
return;
@@ -1265,7 +1269,7 @@ ieee80211_rx_h_uapsd_and_pspoll(struct ieee80211_rx_data *rx)
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
int tid, ac;
- if (!rx->sta || !(status->rx_flags & IEEE80211_RX_RA_MATCH))
+ if (!rx->sta)
return RX_CONTINUE;
if (sdata->vif.type != NL80211_IFTYPE_AP &&
@@ -1367,11 +1371,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
}
}
} else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) {
- u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len,
- NL80211_IFTYPE_OCB);
- /* OCB uses wild-card BSSID */
- if (is_broadcast_ether_addr(bssid))
- sta->last_rx = jiffies;
+ sta->last_rx = jiffies;
} else if (!is_multicast_ether_addr(hdr->addr1)) {
/*
* Mesh beacons will update last_rx when if they are found to
@@ -1386,9 +1386,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
}
}
- if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
- return RX_CONTINUE;
-
if (rx->sdata->vif.type == NL80211_IFTYPE_STATION)
ieee80211_sta_rx_notify(rx->sdata, hdr);
@@ -1517,13 +1514,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
* possible.
*/
- /*
- * No point in finding a key and decrypting if the frame is neither
- * addressed to us nor a multicast frame.
- */
- if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
- return RX_CONTINUE;
-
/* start without a key */
rx->key = NULL;
fc = hdr->frame_control;
@@ -1795,7 +1785,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
frag = sc & IEEE80211_SCTL_FRAG;
if (is_multicast_ether_addr(hdr->addr1)) {
- rx->local->dot11MulticastReceivedFrameCount++;
+ I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount);
goto out_no_led;
}
@@ -1878,7 +1868,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
rx->skb = __skb_dequeue(&entry->skb_list);
if (skb_tailroom(rx->skb) < entry->extra_len) {
- I802_DEBUG_INC(rx->local->rx_expand_skb_head2);
+ I802_DEBUG_INC(rx->local->rx_expand_skb_head_defrag);
if (unlikely(pskb_expand_head(rx->skb, 0, entry->extra_len,
GFP_ATOMIC))) {
I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
@@ -2054,18 +2044,15 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
struct sk_buff *skb, *xmit_skb;
struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
struct sta_info *dsta;
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
-
- dev->stats.rx_packets++;
- dev->stats.rx_bytes += rx->skb->len;
skb = rx->skb;
xmit_skb = NULL;
+ ieee80211_rx_stats(dev, skb->len);
+
if ((sdata->vif.type == NL80211_IFTYPE_AP ||
sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
- (status->rx_flags & IEEE80211_RX_RA_MATCH) &&
(sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
if (is_multicast_ether_addr(ehdr->h_dest)) {
/*
@@ -2206,7 +2193,6 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
struct sk_buff *skb = rx->skb, *fwd_skb;
struct ieee80211_local *local = rx->local;
struct ieee80211_sub_if_data *sdata = rx->sdata;
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
u16 q, hdrlen;
@@ -2237,8 +2223,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
mesh_rmc_check(rx->sdata, hdr->addr3, mesh_hdr))
return RX_DROP_MONITOR;
- if (!ieee80211_is_data(hdr->frame_control) ||
- !(status->rx_flags & IEEE80211_RX_RA_MATCH))
+ if (!ieee80211_is_data(hdr->frame_control))
return RX_CONTINUE;
if (!mesh_hdr->ttl)
@@ -2329,11 +2314,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames);
ieee80211_add_pending_skb(local, fwd_skb);
out:
- if (is_multicast_ether_addr(hdr->addr1) ||
- sdata->dev->flags & IFF_PROMISC)
+ if (is_multicast_ether_addr(hdr->addr1))
return RX_CONTINUE;
- else
- return RX_DROP_MONITOR;
+ return RX_DROP_MONITOR;
}
#endif
@@ -2444,6 +2427,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
struct {
__le16 control, start_seq_num;
} __packed bar_data;
+ struct ieee80211_event event = {
+ .type = BAR_RX_EVENT,
+ };
if (!rx->sta)
return RX_DROP_MONITOR;
@@ -2459,6 +2445,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
return RX_DROP_MONITOR;
start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4;
+ event.u.ba.tid = tid;
+ event.u.ba.ssn = start_seq_num;
+ event.u.ba.sta = &rx->sta->sta;
/* reset session timer */
if (tid_agg_rx->timeout)
@@ -2471,6 +2460,8 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
start_seq_num, frames);
spin_unlock(&tid_agg_rx->reorder_lock);
+ drv_event_callback(rx->local, rx->sdata, &event);
+
kfree_skb(skb);
return RX_QUEUED;
}
@@ -2560,9 +2551,6 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
rx->flags |= IEEE80211_RX_BEACON_REPORTED;
}
- if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
- return RX_DROP_MONITOR;
-
if (ieee80211_drop_unencrypted_mgmt(rx))
return RX_DROP_UNUSABLE;
@@ -2590,9 +2578,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT)
return RX_DROP_UNUSABLE;
- if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
- return RX_DROP_UNUSABLE;
-
switch (mgmt->u.action.category) {
case WLAN_CATEGORY_HT:
/* reject HT action frames from stations not supporting HT */
@@ -3076,8 +3061,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
}
prev_dev = sdata->dev;
- sdata->dev->stats.rx_packets++;
- sdata->dev->stats.rx_bytes += skb->len;
+ ieee80211_rx_stats(sdata->dev, skb->len);
}
if (prev_dev) {
@@ -3245,16 +3229,25 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
ieee80211_sta_reorder_release(sta->sdata, tid_agg_rx, &frames);
spin_unlock(&tid_agg_rx->reorder_lock);
+ if (!skb_queue_empty(&frames)) {
+ struct ieee80211_event event = {
+ .type = BA_FRAME_TIMEOUT,
+ .u.ba.tid = tid,
+ .u.ba.sta = &sta->sta,
+ };
+ drv_event_callback(rx.local, rx.sdata, &event);
+ }
+
ieee80211_rx_handlers(&rx, &frames);
}
/* main receive path */
-static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
- struct ieee80211_hdr *hdr)
+static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
{
struct ieee80211_sub_if_data *sdata = rx->sdata;
struct sk_buff *skb = rx->skb;
+ struct ieee80211_hdr *hdr = (void *)skb->data;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type);
int multicast = is_multicast_ether_addr(hdr->addr1);
@@ -3263,30 +3256,23 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
case NL80211_IFTYPE_STATION:
if (!bssid && !sdata->u.mgd.use_4addr)
return false;
- if (!multicast &&
- !ether_addr_equal(sdata->vif.addr, hdr->addr1)) {
- if (!(sdata->dev->flags & IFF_PROMISC) ||
- sdata->u.mgd.use_4addr)
- return false;
- status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
- }
- break;
+ if (multicast)
+ return true;
+ return ether_addr_equal(sdata->vif.addr, hdr->addr1);
case NL80211_IFTYPE_ADHOC:
if (!bssid)
return false;
if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
return false;
- if (ieee80211_is_beacon(hdr->frame_control)) {
+ if (ieee80211_is_beacon(hdr->frame_control))
return true;
- } else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) {
+ if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid))
return false;
- } else if (!multicast &&
- !ether_addr_equal(sdata->vif.addr, hdr->addr1)) {
- if (!(sdata->dev->flags & IFF_PROMISC))
- return false;
- status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
- } else if (!rx->sta) {
+ if (!multicast &&
+ !ether_addr_equal(sdata->vif.addr, hdr->addr1))
+ return false;
+ if (!rx->sta) {
int rate_idx;
if (status->flag & (RX_FLAG_HT | RX_FLAG_VHT))
rate_idx = 0; /* TODO: HT/VHT rates */
@@ -3295,25 +3281,18 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
ieee80211_ibss_rx_no_sta(sdata, bssid, hdr->addr2,
BIT(rate_idx));
}
- break;
+ return true;
case NL80211_IFTYPE_OCB:
if (!bssid)
return false;
- if (ieee80211_is_beacon(hdr->frame_control)) {
+ if (ieee80211_is_beacon(hdr->frame_control))
return false;
- } else if (!is_broadcast_ether_addr(bssid)) {
- ocb_dbg(sdata, "BSSID mismatch in OCB mode!\n");
+ if (!is_broadcast_ether_addr(bssid))
return false;
- } else if (!multicast &&
- !ether_addr_equal(sdata->dev->dev_addr,
- hdr->addr1)) {
- /* if we are in promisc mode we also accept
- * packets not destined for us
- */
- if (!(sdata->dev->flags & IFF_PROMISC))
- return false;
- rx->flags &= ~IEEE80211_RX_RA_MATCH;
- } else if (!rx->sta) {
+ if (!multicast &&
+ !ether_addr_equal(sdata->dev->dev_addr, hdr->addr1))
+ return false;
+ if (!rx->sta) {
int rate_idx;
if (status->flag & RX_FLAG_HT)
rate_idx = 0; /* TODO: HT rates */
@@ -3322,22 +3301,17 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
ieee80211_ocb_rx_no_sta(sdata, bssid, hdr->addr2,
BIT(rate_idx));
}
- break;
+ return true;
case NL80211_IFTYPE_MESH_POINT:
- if (!multicast &&
- !ether_addr_equal(sdata->vif.addr, hdr->addr1)) {
- if (!(sdata->dev->flags & IFF_PROMISC))
- return false;
-
- status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
- }
- break;
+ if (multicast)
+ return true;
+ return ether_addr_equal(sdata->vif.addr, hdr->addr1);
case NL80211_IFTYPE_AP_VLAN:
case NL80211_IFTYPE_AP:
- if (!bssid) {
- if (!ether_addr_equal(sdata->vif.addr, hdr->addr1))
- return false;
- } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) {
+ if (!bssid)
+ return ether_addr_equal(sdata->vif.addr, hdr->addr1);
+
+ if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) {
/*
* Accept public action frames even when the
* BSSID doesn't match, this is used for P2P
@@ -3349,10 +3323,10 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
return false;
if (ieee80211_is_public_action(hdr, skb->len))
return true;
- if (!ieee80211_is_beacon(hdr->frame_control))
- return false;
- status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
- } else if (!ieee80211_has_tods(hdr->frame_control)) {
+ return ieee80211_is_beacon(hdr->frame_control);
+ }
+
+ if (!ieee80211_has_tods(hdr->frame_control)) {
/* ignore data frames to TDLS-peers */
if (ieee80211_is_data(hdr->frame_control))
return false;
@@ -3361,30 +3335,22 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
!ether_addr_equal(bssid, hdr->addr1))
return false;
}
- break;
+ return true;
case NL80211_IFTYPE_WDS:
if (bssid || !ieee80211_is_data(hdr->frame_control))
return false;
- if (!ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2))
- return false;
- break;
+ return ether_addr_equal(sdata->u.wds.remote_addr, hdr->addr2);
case NL80211_IFTYPE_P2P_DEVICE:
- if (!ieee80211_is_public_action(hdr, skb->len) &&
- !ieee80211_is_probe_req(hdr->frame_control) &&
- !ieee80211_is_probe_resp(hdr->frame_control) &&
- !ieee80211_is_beacon(hdr->frame_control))
- return false;
- if (!ether_addr_equal(sdata->vif.addr, hdr->addr1) &&
- !multicast)
- status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
- break;
+ return ieee80211_is_public_action(hdr, skb->len) ||
+ ieee80211_is_probe_req(hdr->frame_control) ||
+ ieee80211_is_probe_resp(hdr->frame_control) ||
+ ieee80211_is_beacon(hdr->frame_control);
default:
- /* should never get here */
- WARN_ON_ONCE(1);
break;
}
- return true;
+ WARN_ON_ONCE(1);
+ return false;
}
/*
@@ -3398,13 +3364,10 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
{
struct ieee80211_local *local = rx->local;
struct ieee80211_sub_if_data *sdata = rx->sdata;
- struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- struct ieee80211_hdr *hdr = (void *)skb->data;
rx->skb = skb;
- status->rx_flags |= IEEE80211_RX_RA_MATCH;
- if (!prepare_for_handlers(rx, hdr))
+ if (!ieee80211_accept_frame(rx))
return false;
if (!consume) {
@@ -3447,7 +3410,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
rx.local = local;
if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
- local->dot11ReceivedFragmentCount++;
+ I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
if (ieee80211_is_mgmt(fc)) {
/* drop frame if too short for header */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 12971b71d0fa..ce0c1662de42 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -66,10 +66,12 @@
static const struct rhashtable_params sta_rht_params = {
.nelem_hint = 3, /* start small */
+ .automatic_shrinking = true,
.head_offset = offsetof(struct sta_info, hash_node),
.key_offset = offsetof(struct sta_info, sta.addr),
.key_len = ETH_ALEN,
.hashfn = sta_addr_hash,
+ .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE,
};
/* Caller must hold local->sta_mtx */
@@ -157,8 +159,24 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
const u8 *addr)
{
struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
+ struct rhash_head *tmp;
+ const struct bucket_table *tbl;
- return rhashtable_lookup_fast(&local->sta_hash, addr, sta_rht_params);
+ rcu_read_lock();
+ tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
+
+ for_each_sta_info(local, tbl, addr, sta, tmp) {
+ if (sta->sdata == sdata) {
+ rcu_read_unlock();
+ /* this is safe as the caller must already hold
+ * another rcu read section or the mutex
+ */
+ return sta;
+ }
+ }
+ rcu_read_unlock();
+ return NULL;
}
/*
@@ -269,7 +287,7 @@ static int sta_prepare_rate_control(struct ieee80211_local *local,
sta->rate_ctrl = local->rate_ctrl;
sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl,
- &sta->sta, gfp);
+ sta, gfp);
if (!sta->rate_ctrl_priv)
return -ENOMEM;
@@ -295,6 +313,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
mutex_init(&sta->ampdu_mlme.mtx);
#ifdef CONFIG_MAC80211_MESH
+ spin_lock_init(&sta->plink_lock);
if (ieee80211_vif_is_mesh(&sdata->vif) &&
!sdata->u.mesh.user_mpm)
init_timer(&sta->plink_timer);
@@ -1200,6 +1219,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
ps_dbg(sdata,
"STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n",
sta->sta.addr, sta->sta.aid, filtered, buffered);
+
+ ieee80211_check_fast_xmit(sta);
}
static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata,
@@ -1598,6 +1619,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
if (block) {
set_sta_flag(sta, WLAN_STA_PS_DRIVER);
+ ieee80211_clear_fast_xmit(sta);
return;
}
@@ -1615,6 +1637,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
ieee80211_queue_work(hw, &sta->drv_deliver_wk);
} else {
clear_sta_flag(sta, WLAN_STA_PS_DRIVER);
+ ieee80211_check_fast_xmit(sta);
}
}
EXPORT_SYMBOL(ieee80211_sta_block_awake);
@@ -1719,6 +1742,7 @@ int sta_info_move_state(struct sta_info *sta,
!sta->sdata->u.vlan.sta))
atomic_dec(&sta->sdata->bss->num_mcast_sta);
clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
+ ieee80211_clear_fast_xmit(sta);
}
break;
case IEEE80211_STA_AUTHORIZED:
@@ -1728,6 +1752,7 @@ int sta_info_move_state(struct sta_info *sta,
!sta->sdata->u.vlan.sta))
atomic_inc(&sta->sdata->bss->num_mcast_sta);
set_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
+ ieee80211_check_fast_xmit(sta);
}
break;
default:
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5c164fb3f6c5..9bd1e97876bd 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -241,6 +241,34 @@ struct sta_ampdu_mlme {
/* Value to indicate no TID reservation */
#define IEEE80211_TID_UNRESERVED 0xff
+#define IEEE80211_FAST_XMIT_MAX_IV 18
+
+/**
+ * struct ieee80211_fast_tx - TX fastpath information
+ * @key: key to use for hw crypto
+ * @hdr: the 802.11 header to put with the frame
+ * @hdr_len: actual 802.11 header length
+ * @sa_offs: offset of the SA
+ * @da_offs: offset of the DA
+ * @pn_offs: offset where to put PN for crypto (or 0 if not needed)
+ * @band: band this will be transmitted on, for tx_info
+ * @rcu_head: RCU head to free this struct
+ *
+ * This struct is small enough so that the common case (maximum crypto
+ * header length of 8 like for CCMP/GCMP) fits into a single 64-byte
+ * cache line.
+ */
+struct ieee80211_fast_tx {
+ struct ieee80211_key *key;
+ u8 hdr_len;
+ u8 sa_offs, da_offs, pn_offs;
+ u8 band;
+ u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV +
+ sizeof(rfc1042_header)];
+
+ struct rcu_head rcu_head;
+};
+
/**
* struct sta_info - STA information
*
@@ -257,6 +285,8 @@ struct sta_ampdu_mlme {
* @gtk: group keys negotiated with this station, if any
* @gtk_idx: last installed group key index
* @rate_ctrl: rate control algorithm reference
+ * @rate_ctrl_lock: spinlock used to protect rate control data
+ * (data inside the algorithm, so serializes calls there)
* @rate_ctrl_priv: rate control private per-STA pointer
* @last_tx_rate: rate used for last transmit, to report to userspace as
* "the" transmit rate
@@ -295,10 +325,10 @@ struct sta_ampdu_mlme {
* @fail_avg: moving percentage of failed MSDUs
* @tx_packets: number of RX/TX MSDUs
* @tx_bytes: number of bytes transmitted to this STA
- * @tx_fragments: number of transmitted MPDUs
* @tid_seq: per-TID sequence numbers for sending to this STA
* @ampdu_mlme: A-MPDU state machine state
* @timer_to_tid: identity mapping to ID timers
+ * @plink_lock: serialize access to plink fields
* @llid: Local link ID
* @plid: Peer link ID
* @reason: Cancel reason on PLINK_HOLDING state
@@ -338,6 +368,7 @@ struct sta_ampdu_mlme {
* using IEEE80211_NUM_TID entry for non-QoS frames
* @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID
* entry for non-QoS frames
+ * @fast_tx: TX fastpath information
*/
struct sta_info {
/* General information, mostly static */
@@ -352,8 +383,11 @@ struct sta_info {
u8 ptk_idx;
struct rate_control_ref *rate_ctrl;
void *rate_ctrl_priv;
+ spinlock_t rate_ctrl_lock;
spinlock_t lock;
+ struct ieee80211_fast_tx __rcu *fast_tx;
+
struct work_struct drv_deliver_wk;
u16 listen_interval;
@@ -400,7 +434,6 @@ struct sta_info {
unsigned int fail_avg;
/* Updated from TX path only, no locking requirements */
- u32 tx_fragments;
u64 tx_packets[IEEE80211_NUM_ACS];
u64 tx_bytes[IEEE80211_NUM_ACS];
struct ieee80211_tx_rate last_tx_rate;
@@ -422,9 +455,10 @@ struct sta_info {
#ifdef CONFIG_MAC80211_MESH
/*
- * Mesh peer link attributes
+ * Mesh peer link attributes, protected by plink_lock.
* TODO: move to a sub-structure that is referenced with pointer?
*/
+ spinlock_t plink_lock;
u16 llid;
u16 plid;
u16 reason;
@@ -432,6 +466,7 @@ struct sta_info {
enum nl80211_plink_state plink_state;
u32 plink_timeout;
struct timer_list plink_timer;
+
s64 t_offset;
s64 t_offset_setpoint;
/* mesh power save */
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 005fdbe39a8b..461594966b65 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -631,15 +631,15 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
}
if (acked || noack_success) {
- local->dot11TransmittedFrameCount++;
- if (!pubsta)
- local->dot11MulticastTransmittedFrameCount++;
- if (retry_count > 0)
- local->dot11RetryCount++;
- if (retry_count > 1)
- local->dot11MultipleRetryCount++;
+ I802_DEBUG_INC(local->dot11TransmittedFrameCount);
+ if (!pubsta)
+ I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount);
+ if (retry_count > 0)
+ I802_DEBUG_INC(local->dot11RetryCount);
+ if (retry_count > 1)
+ I802_DEBUG_INC(local->dot11MultipleRetryCount);
} else {
- local->dot11FailedCount++;
+ I802_DEBUG_INC(local->dot11FailedCount);
}
}
EXPORT_SYMBOL(ieee80211_tx_status_noskb);
@@ -802,13 +802,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
if ((info->flags & IEEE80211_TX_STAT_ACK) ||
(info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED)) {
if (ieee80211_is_first_frag(hdr->seq_ctrl)) {
- local->dot11TransmittedFrameCount++;
+ I802_DEBUG_INC(local->dot11TransmittedFrameCount);
if (is_multicast_ether_addr(ieee80211_get_DA(hdr)))
- local->dot11MulticastTransmittedFrameCount++;
+ I802_DEBUG_INC(local->dot11MulticastTransmittedFrameCount);
if (retry_count > 0)
- local->dot11RetryCount++;
+ I802_DEBUG_INC(local->dot11RetryCount);
if (retry_count > 1)
- local->dot11MultipleRetryCount++;
+ I802_DEBUG_INC(local->dot11MultipleRetryCount);
}
/* This counter shall be incremented for an acknowledged MPDU
@@ -818,10 +818,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
if (!is_multicast_ether_addr(hdr->addr1) ||
ieee80211_is_data(fc) ||
ieee80211_is_mgmt(fc))
- local->dot11TransmittedFragmentCount++;
+ I802_DEBUG_INC(local->dot11TransmittedFragmentCount);
} else {
if (ieee80211_is_first_frag(hdr->seq_ctrl))
- local->dot11FailedCount++;
+ I802_DEBUG_INC(local->dot11FailedCount);
}
if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) &&
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index fff0d864adfa..8a92a920ff17 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -527,30 +527,19 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
/* if HT support is only added in TDLS, we need an HT-operation IE */
if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) {
- struct ieee80211_chanctx_conf *chanctx_conf =
- rcu_dereference(sdata->vif.chanctx_conf);
- if (!WARN_ON(!chanctx_conf)) {
- pos = skb_put(skb, 2 +
- sizeof(struct ieee80211_ht_operation));
- /* send an empty HT operation IE */
- ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap,
- &chanctx_conf->def, 0);
- }
+ pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation));
+ /* send an empty HT operation IE */
+ ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap,
+ &sdata->vif.bss_conf.chandef, 0);
}
ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
/* only include VHT-operation if not on the 2.4GHz band */
- if (band != IEEE80211_BAND_2GHZ && !ap_sta->sta.vht_cap.vht_supported &&
- sta->sta.vht_cap.vht_supported) {
- struct ieee80211_chanctx_conf *chanctx_conf =
- rcu_dereference(sdata->vif.chanctx_conf);
- if (!WARN_ON(!chanctx_conf)) {
- pos = skb_put(skb, 2 +
- sizeof(struct ieee80211_vht_operation));
- ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap,
- &chanctx_conf->def);
- }
+ if (band != IEEE80211_BAND_2GHZ && sta->sta.vht_cap.vht_supported) {
+ pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation));
+ ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap,
+ &sdata->vif.bss_conf.chandef);
}
rcu_read_unlock();
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 4c2e7690226a..6f14591d8ca9 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -69,6 +69,17 @@
#define CHANCTX_PR_ARG CHANDEF_PR_ARG, MIN_CHANDEF_PR_ARG, \
__entry->rx_chains_static, __entry->rx_chains_dynamic
+#define KEY_ENTRY __field(u32, cipher) \
+ __field(u8, hw_key_idx) \
+ __field(u8, flags) \
+ __field(s8, keyidx)
+#define KEY_ASSIGN(k) __entry->cipher = (k)->cipher; \
+ __entry->flags = (k)->flags; \
+ __entry->keyidx = (k)->keyidx; \
+ __entry->hw_key_idx = (k)->hw_key_idx;
+#define KEY_PR_FMT " cipher:0x%x, flags=%#x, keyidx=%d, hw_key_idx=%d"
+#define KEY_PR_ARG __entry->cipher, __entry->flags, __entry->keyidx, __entry->hw_key_idx
+
/*
@@ -522,25 +533,19 @@ TRACE_EVENT(drv_set_key,
LOCAL_ENTRY
VIF_ENTRY
STA_ENTRY
- __field(u32, cipher)
- __field(u8, hw_key_idx)
- __field(u8, flags)
- __field(s8, keyidx)
+ KEY_ENTRY
),
TP_fast_assign(
LOCAL_ASSIGN;
VIF_ASSIGN;
STA_ASSIGN;
- __entry->cipher = key->cipher;
- __entry->flags = key->flags;
- __entry->keyidx = key->keyidx;
- __entry->hw_key_idx = key->hw_key_idx;
+ KEY_ASSIGN(key);
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT,
- LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
+ LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT KEY_PR_FMT,
+ LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, KEY_PR_ARG
)
);
@@ -656,28 +661,25 @@ TRACE_EVENT(drv_get_stats,
)
);
-TRACE_EVENT(drv_get_tkip_seq,
+TRACE_EVENT(drv_get_key_seq,
TP_PROTO(struct ieee80211_local *local,
- u8 hw_key_idx, u32 *iv32, u16 *iv16),
+ struct ieee80211_key_conf *key),
- TP_ARGS(local, hw_key_idx, iv32, iv16),
+ TP_ARGS(local, key),
TP_STRUCT__entry(
LOCAL_ENTRY
- __field(u8, hw_key_idx)
- __field(u32, iv32)
- __field(u16, iv16)
+ KEY_ENTRY
),
TP_fast_assign(
LOCAL_ASSIGN;
- __entry->hw_key_idx = hw_key_idx;
- __entry->iv32 = *iv32;
- __entry->iv16 = *iv16;
+ KEY_ASSIGN(key);
),
TP_printk(
- LOCAL_PR_FMT, LOCAL_PR_ARG
+ LOCAL_PR_FMT KEY_PR_FMT,
+ LOCAL_PR_ARG, KEY_PR_ARG
)
);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 667111ee6a20..8df134213adf 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -37,6 +37,16 @@
/* misc utils */
+static inline void ieee80211_tx_stats(struct net_device *dev, u32 len)
+{
+ struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+
+ u64_stats_update_begin(&tstats->syncp);
+ tstats->tx_packets++;
+ tstats->tx_bytes += len;
+ u64_stats_update_end(&tstats->syncp);
+}
+
static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
struct sk_buff *skb, int group_addr,
int next_frag_len)
@@ -987,7 +997,6 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
skb_queue_walk(&tx->skbs, skb) {
ac = skb_get_queue_mapping(skb);
- tx->sta->tx_fragments++;
tx->sta->tx_bytes[ac] += skb->len;
}
if (ac >= 0)
@@ -1600,7 +1609,7 @@ static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
if (skb_cloned(skb) &&
(!(local->hw.flags & IEEE80211_HW_SUPPORTS_CLONED_SKBS) ||
!skb_clone_writable(skb, ETH_HLEN) ||
- sdata->crypto_tx_tailroom_needed_cnt))
+ (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt)))
I802_DEBUG_INC(local->tx_expand_skb_head_cloned);
else if (head_need || tail_need)
I802_DEBUG_INC(local->tx_expand_skb_head);
@@ -2387,12 +2396,460 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
return ERR_PTR(ret);
}
+/*
+ * fast-xmit overview
+ *
+ * The core idea of this fast-xmit is to remove per-packet checks by checking
+ * them out of band. ieee80211_check_fast_xmit() implements the out-of-band
+ * checks that are needed to get the sta->fast_tx pointer assigned, after which
+ * much less work can be done per packet. For example, fragmentation must be
+ * disabled or the fast_tx pointer will not be set. All the conditions are seen
+ * in the code here.
+ *
+ * Once assigned, the fast_tx data structure also caches the per-packet 802.11
+ * header and other data to aid packet processing in ieee80211_xmit_fast().
+ *
+ * The most difficult part of this is that when any of these assumptions
+ * change, an external trigger (i.e. a call to ieee80211_clear_fast_xmit(),
+ * ieee80211_check_fast_xmit() or friends) is required to reset the data,
+ * since the per-packet code no longer checks the conditions. This is reflected
+ * by the calls to these functions throughout the rest of the code, and must be
+ * maintained if any of the TX path checks change.
+ */
+
+void ieee80211_check_fast_xmit(struct sta_info *sta)
+{
+ struct ieee80211_fast_tx build = {}, *fast_tx = NULL, *old;
+ struct ieee80211_local *local = sta->local;
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_hdr *hdr = (void *)build.hdr;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ __le16 fc;
+
+ if (!(local->hw.flags & IEEE80211_HW_SUPPORT_FAST_XMIT))
+ return;
+
+ /* Locking here protects both the pointer itself, and against concurrent
+ * invocations winning data access races to, e.g., the key pointer that
+ * is used.
+ * Without it, the invocation of this function right after the key
+ * pointer changes wouldn't be sufficient, as another CPU could access
+ * the pointer, then stall, and then do the cache update after the CPU
+ * that invalidated the key.
+ * With the locking, such scenarios cannot happen as the check for the
+ * key and the fast-tx assignment are done atomically, so the CPU that
+ * modifies the key will either wait or other one will see the key
+ * cleared/changed already.
+ */
+ spin_lock_bh(&sta->lock);
+ if (local->hw.flags & IEEE80211_HW_SUPPORTS_PS &&
+ !(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) &&
+ sdata->vif.type == NL80211_IFTYPE_STATION)
+ goto out;
+
+ if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ goto out;
+
+ if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
+ test_sta_flag(sta, WLAN_STA_PS_DRIVER) ||
+ test_sta_flag(sta, WLAN_STA_PS_DELIVER))
+ goto out;
+
+ if (sdata->noack_map)
+ goto out;
+
+ /* fast-xmit doesn't handle fragmentation at all */
+ if (local->hw.wiphy->frag_threshold != (u32)-1 &&
+ !local->ops->set_frag_threshold)
+ goto out;
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ if (!chanctx_conf) {
+ rcu_read_unlock();
+ goto out;
+ }
+ build.band = chanctx_conf->def.chan->band;
+ rcu_read_unlock();
+
+ fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
+
+ switch (sdata->vif.type) {
+ case NL80211_IFTYPE_ADHOC:
+ /* DA SA BSSID */
+ build.da_offs = offsetof(struct ieee80211_hdr, addr1);
+ build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+ memcpy(hdr->addr3, sdata->u.ibss.bssid, ETH_ALEN);
+ build.hdr_len = 24;
+ break;
+ case NL80211_IFTYPE_STATION:
+ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+ /* DA SA BSSID */
+ build.da_offs = offsetof(struct ieee80211_hdr, addr1);
+ build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+ memcpy(hdr->addr3, sdata->u.mgd.bssid, ETH_ALEN);
+ build.hdr_len = 24;
+ break;
+ }
+
+ if (sdata->u.mgd.use_4addr) {
+ /* non-regular ethertype cannot use the fastpath */
+ fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS |
+ IEEE80211_FCTL_TODS);
+ /* RA TA DA SA */
+ memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN);
+ memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+ build.da_offs = offsetof(struct ieee80211_hdr, addr3);
+ build.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+ build.hdr_len = 30;
+ break;
+ }
+ fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
+ /* BSSID SA DA */
+ memcpy(hdr->addr1, sdata->u.mgd.bssid, ETH_ALEN);
+ build.da_offs = offsetof(struct ieee80211_hdr, addr3);
+ build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+ build.hdr_len = 24;
+ break;
+ case NL80211_IFTYPE_AP_VLAN:
+ if (sdata->wdev.use_4addr) {
+ fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS |
+ IEEE80211_FCTL_TODS);
+ /* RA TA DA SA */
+ memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN);
+ memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+ build.da_offs = offsetof(struct ieee80211_hdr, addr3);
+ build.sa_offs = offsetof(struct ieee80211_hdr, addr4);
+ build.hdr_len = 30;
+ break;
+ }
+ /* fall through */
+ case NL80211_IFTYPE_AP:
+ fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
+ /* DA BSSID SA */
+ build.da_offs = offsetof(struct ieee80211_hdr, addr1);
+ memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN);
+ build.sa_offs = offsetof(struct ieee80211_hdr, addr3);
+ build.hdr_len = 24;
+ break;
+ default:
+ /* not handled on fast-xmit */
+ goto out;
+ }
+
+ if (sta->sta.wme) {
+ build.hdr_len += 2;
+ fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
+ }
+
+ /* We store the key here so there's no point in using rcu_dereference()
+ * but that's fine because the code that changes the pointers will call
+ * this function after doing so. For a single CPU that would be enough,
+ * for multiple see the comment above.
+ */
+ build.key = rcu_access_pointer(sta->ptk[sta->ptk_idx]);
+ if (!build.key)
+ build.key = rcu_access_pointer(sdata->default_unicast_key);
+ if (build.key) {
+ bool gen_iv, iv_spc, mmic;
+
+ gen_iv = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV;
+ iv_spc = build.key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE;
+ mmic = build.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC;
+
+ /* don't handle software crypto */
+ if (!(build.key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
+ goto out;
+
+ switch (build.key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_CCMP:
+ case WLAN_CIPHER_SUITE_CCMP_256:
+ /* add fixed key ID */
+ if (gen_iv) {
+ (build.hdr + build.hdr_len)[3] =
+ 0x20 | (build.key->conf.keyidx << 6);
+ build.pn_offs = build.hdr_len;
+ }
+ if (gen_iv || iv_spc)
+ build.hdr_len += IEEE80211_CCMP_HDR_LEN;
+ break;
+ case WLAN_CIPHER_SUITE_GCMP:
+ case WLAN_CIPHER_SUITE_GCMP_256:
+ /* add fixed key ID */
+ if (gen_iv) {
+ (build.hdr + build.hdr_len)[3] =
+ 0x20 | (build.key->conf.keyidx << 6);
+ build.pn_offs = build.hdr_len;
+ }
+ if (gen_iv || iv_spc)
+ build.hdr_len += IEEE80211_GCMP_HDR_LEN;
+ break;
+ case WLAN_CIPHER_SUITE_TKIP:
+ /* cannot handle MMIC or IV generation in xmit-fast */
+ if (mmic || gen_iv)
+ goto out;
+ if (iv_spc)
+ build.hdr_len += IEEE80211_TKIP_IV_LEN;
+ break;
+ case WLAN_CIPHER_SUITE_WEP40:
+ case WLAN_CIPHER_SUITE_WEP104:
+ /* cannot handle IV generation in fast-xmit */
+ if (gen_iv)
+ goto out;
+ if (iv_spc)
+ build.hdr_len += IEEE80211_WEP_IV_LEN;
+ break;
+ case WLAN_CIPHER_SUITE_AES_CMAC:
+ case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+ case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+ case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+ WARN(1,
+ "management cipher suite 0x%x enabled for data\n",
+ build.key->conf.cipher);
+ goto out;
+ default:
+ /* we don't know how to generate IVs for this at all */
+ if (WARN_ON(gen_iv))
+ goto out;
+ /* pure hardware keys are OK, of course */
+ if (!(build.key->flags & KEY_FLAG_CIPHER_SCHEME))
+ break;
+ /* cipher scheme might require space allocation */
+ if (iv_spc &&
+ build.key->conf.iv_len > IEEE80211_FAST_XMIT_MAX_IV)
+ goto out;
+ if (iv_spc)
+ build.hdr_len += build.key->conf.iv_len;
+ }
+
+ fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+ }
+
+ hdr->frame_control = fc;
+
+ memcpy(build.hdr + build.hdr_len,
+ rfc1042_header, sizeof(rfc1042_header));
+ build.hdr_len += sizeof(rfc1042_header);
+
+ fast_tx = kmemdup(&build, sizeof(build), GFP_ATOMIC);
+ /* if the kmemdup fails, continue w/o fast_tx */
+ if (!fast_tx)
+ goto out;
+
+ out:
+ /* we might have raced against another call to this function */
+ old = rcu_dereference_protected(sta->fast_tx,
+ lockdep_is_held(&sta->lock));
+ rcu_assign_pointer(sta->fast_tx, fast_tx);
+ if (old)
+ kfree_rcu(old, rcu_head);
+ spin_unlock_bh(&sta->lock);
+}
+
+void ieee80211_check_fast_xmit_all(struct ieee80211_local *local)
+{
+ struct sta_info *sta;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sta, &local->sta_list, list)
+ ieee80211_check_fast_xmit(sta);
+ rcu_read_unlock();
+}
+
+void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ if (sdata != sta->sdata &&
+ (!sta->sdata->bss || sta->sdata->bss != sdata->bss))
+ continue;
+ ieee80211_check_fast_xmit(sta);
+ }
+
+ rcu_read_unlock();
+}
+
+void ieee80211_clear_fast_xmit(struct sta_info *sta)
+{
+ struct ieee80211_fast_tx *fast_tx;
+
+ spin_lock_bh(&sta->lock);
+ fast_tx = rcu_dereference_protected(sta->fast_tx,
+ lockdep_is_held(&sta->lock));
+ RCU_INIT_POINTER(sta->fast_tx, NULL);
+ spin_unlock_bh(&sta->lock);
+
+ if (fast_tx)
+ kfree_rcu(fast_tx, rcu_head);
+}
+
+static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
+ struct net_device *dev, struct sta_info *sta,
+ struct ieee80211_fast_tx *fast_tx,
+ struct sk_buff *skb)
+{
+ struct ieee80211_local *local = sdata->local;
+ u16 ethertype = (skb->data[12] << 8) | skb->data[13];
+ int extra_head = fast_tx->hdr_len - (ETH_HLEN - 2);
+ int hw_headroom = sdata->local->hw.extra_tx_headroom;
+ struct ethhdr eth;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_hdr *hdr = (void *)fast_tx->hdr;
+ struct ieee80211_tx_data tx;
+ ieee80211_tx_result r;
+ struct tid_ampdu_tx *tid_tx = NULL;
+ u8 tid = IEEE80211_NUM_TIDS;
+
+ /* control port protocol needs a lot of special handling */
+ if (cpu_to_be16(ethertype) == sdata->control_port_protocol)
+ return false;
+
+ /* only RFC 1042 SNAP */
+ if (ethertype < ETH_P_802_3_MIN)
+ return false;
+
+ /* don't handle TX status request here either */
+ if (skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)
+ return false;
+
+ if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
+ tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
+ tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
+ if (tid_tx &&
+ !test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state))
+ return false;
+ }
+
+ /* after this point (skb is modified) we cannot return false */
+
+ if (skb_shared(skb)) {
+ struct sk_buff *tmp_skb = skb;
+
+ skb = skb_clone(skb, GFP_ATOMIC);
+ kfree_skb(tmp_skb);
+
+ if (!skb)
+ return true;
+ }
+
+ ieee80211_tx_stats(dev, skb->len + extra_head);
+
+ /* will not be crypto-handled beyond what we do here, so use false
+ * as the may-encrypt argument for the resize to not account for
+ * more room than we already have in 'extra_head'
+ */
+ if (unlikely(ieee80211_skb_resize(sdata, skb,
+ max_t(int, extra_head + hw_headroom -
+ skb_headroom(skb), 0),
+ false))) {
+ kfree_skb(skb);
+ return true;
+ }
+
+ memcpy(&eth, skb->data, ETH_HLEN - 2);
+ hdr = (void *)skb_push(skb, extra_head);
+ memcpy(skb->data, fast_tx->hdr, fast_tx->hdr_len);
+ memcpy(skb->data + fast_tx->da_offs, eth.h_dest, ETH_ALEN);
+ memcpy(skb->data + fast_tx->sa_offs, eth.h_source, ETH_ALEN);
+
+ memset(info, 0, sizeof(*info));
+ info->band = fast_tx->band;
+ info->control.vif = &sdata->vif;
+ info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
+ IEEE80211_TX_CTL_DONTFRAG |
+ (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
+
+ if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
+ *ieee80211_get_qos_ctl(hdr) = tid;
+ hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
+ } else {
+ info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
+ hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
+ sdata->sequence_number += 0x10;
+ }
+
+ sta->tx_msdu[tid]++;
+
+ info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
+
+ __skb_queue_head_init(&tx.skbs);
+
+ tx.flags = IEEE80211_TX_UNICAST;
+ tx.local = local;
+ tx.sdata = sdata;
+ tx.sta = sta;
+ tx.key = fast_tx->key;
+
+ if (fast_tx->key)
+ info->control.hw_key = &fast_tx->key->conf;
+
+ if (!(local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) {
+ tx.skb = skb;
+ r = ieee80211_tx_h_rate_ctrl(&tx);
+ skb = tx.skb;
+ tx.skb = NULL;
+
+ if (r != TX_CONTINUE) {
+ if (r != TX_QUEUED)
+ kfree_skb(skb);
+ return true;
+ }
+ }
+
+ /* statistics normally done by ieee80211_tx_h_stats (but that
+ * has to consider fragmentation, so is more complex)
+ */
+ sta->tx_bytes[skb_get_queue_mapping(skb)] += skb->len;
+ sta->tx_packets[skb_get_queue_mapping(skb)]++;
+
+ if (fast_tx->pn_offs) {
+ u64 pn;
+ u8 *crypto_hdr = skb->data + fast_tx->pn_offs;
+
+ switch (fast_tx->key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_CCMP:
+ case WLAN_CIPHER_SUITE_CCMP_256:
+ pn = atomic64_inc_return(&fast_tx->key->u.ccmp.tx_pn);
+ crypto_hdr[0] = pn;
+ crypto_hdr[1] = pn >> 8;
+ crypto_hdr[4] = pn >> 16;
+ crypto_hdr[5] = pn >> 24;
+ crypto_hdr[6] = pn >> 32;
+ crypto_hdr[7] = pn >> 40;
+ break;
+ case WLAN_CIPHER_SUITE_GCMP:
+ case WLAN_CIPHER_SUITE_GCMP_256:
+ pn = atomic64_inc_return(&fast_tx->key->u.gcmp.tx_pn);
+ crypto_hdr[0] = pn;
+ crypto_hdr[1] = pn >> 8;
+ crypto_hdr[4] = pn >> 16;
+ crypto_hdr[5] = pn >> 24;
+ crypto_hdr[6] = pn >> 32;
+ crypto_hdr[7] = pn >> 40;
+ break;
+ }
+ }
+
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ sdata = container_of(sdata->bss,
+ struct ieee80211_sub_if_data, u.ap);
+
+ __skb_queue_tail(&tx.skbs, skb);
+ ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false);
+ return true;
+}
+
void __ieee80211_subif_start_xmit(struct sk_buff *skb,
struct net_device *dev,
u32 info_flags)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct sta_info *sta;
+ struct sk_buff *next;
if (unlikely(skb->len < ETH_HLEN)) {
kfree_skb(skb);
@@ -2401,20 +2858,67 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
rcu_read_lock();
- if (ieee80211_lookup_ra_sta(sdata, skb, &sta)) {
- kfree_skb(skb);
- goto out;
+ if (ieee80211_lookup_ra_sta(sdata, skb, &sta))
+ goto out_free;
+
+ if (!IS_ERR_OR_NULL(sta)) {
+ struct ieee80211_fast_tx *fast_tx;
+
+ fast_tx = rcu_dereference(sta->fast_tx);
+
+ if (fast_tx &&
+ ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb))
+ goto out;
}
- skb = ieee80211_build_hdr(sdata, skb, info_flags, sta);
- if (IS_ERR(skb))
- goto out;
+ if (skb_is_gso(skb)) {
+ struct sk_buff *segs;
+
+ segs = skb_gso_segment(skb, 0);
+ if (IS_ERR(segs)) {
+ goto out_free;
+ } else if (segs) {
+ consume_skb(skb);
+ skb = segs;
+ }
+ } else {
+ /* we cannot process non-linear frames on this path */
+ if (skb_linearize(skb)) {
+ kfree_skb(skb);
+ goto out;
+ }
+
+ /* the frame could be fragmented, software-encrypted, and other
+ * things so we cannot really handle checksum offload with it -
+ * fix it up in software before we handle anything else.
+ */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ skb_set_transport_header(skb,
+ skb_checksum_start_offset(skb));
+ if (skb_checksum_help(skb))
+ goto out_free;
+ }
+ }
+
+ next = skb;
+ while (next) {
+ skb = next;
+ next = skb->next;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
- dev->trans_start = jiffies;
+ skb->prev = NULL;
+ skb->next = NULL;
+
+ skb = ieee80211_build_hdr(sdata, skb, info_flags, sta);
+ if (IS_ERR(skb))
+ goto out;
- ieee80211_xmit(sdata, sta, skb);
+ ieee80211_tx_stats(dev, skb->len);
+
+ ieee80211_xmit(sdata, sta, skb);
+ }
+ goto out;
+ out_free:
+ kfree_skb(skb);
out:
rcu_read_unlock();
}
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index a4220e92f0cc..efa3f48f1ec5 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -98,8 +98,7 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local,
hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
- if (WARN_ON(skb_tailroom(skb) < IEEE80211_WEP_ICV_LEN ||
- skb_headroom(skb) < IEEE80211_WEP_IV_LEN))
+ if (WARN_ON(skb_headroom(skb) < IEEE80211_WEP_IV_LEN))
return NULL;
hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -167,6 +166,9 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local,
size_t len;
u8 rc4key[3 + WLAN_KEY_LEN_WEP104];
+ if (WARN_ON(skb_tailroom(skb) < IEEE80211_WEP_ICV_LEN))
+ return -1;
+
iv = ieee80211_wep_add_iv(local, skb, keylen, keyidx);
if (!iv)
return -1;
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index 5d9f68c75e5f..70be9c799f8a 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -22,13 +22,14 @@
static struct net_device *
ieee802154_add_iface_deprecated(struct wpan_phy *wpan_phy,
- const char *name, int type)
+ const char *name,
+ unsigned char name_assign_type, int type)
{
struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
struct net_device *dev;
rtnl_lock();
- dev = ieee802154_if_add(local, name, type,
+ dev = ieee802154_if_add(local, name, name_assign_type, type,
cpu_to_le64(0x0000000000000000ULL));
rtnl_unlock();
@@ -45,12 +46,14 @@ static void ieee802154_del_iface_deprecated(struct wpan_phy *wpan_phy,
static int
ieee802154_add_iface(struct wpan_phy *phy, const char *name,
+ unsigned char name_assign_type,
enum nl802154_iftype type, __le64 extended_addr)
{
struct ieee802154_local *local = wpan_phy_priv(phy);
struct net_device *err;
- err = ieee802154_if_add(local, name, type, extended_addr);
+ err = ieee802154_if_add(local, name, name_assign_type, type,
+ extended_addr);
return PTR_ERR_OR_ZERO(err);
}
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index bebd70ffc7a3..127ba18386fc 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -182,7 +182,8 @@ void ieee802154_iface_exit(void);
void ieee802154_if_remove(struct ieee802154_sub_if_data *sdata);
struct net_device *
ieee802154_if_add(struct ieee802154_local *local, const char *name,
- enum nl802154_iftype type, __le64 extended_addr);
+ unsigned char name_assign_type, enum nl802154_iftype type,
+ __le64 extended_addr);
void ieee802154_remove_interfaces(struct ieee802154_local *local);
#endif /* __IEEE802154_I_H */
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 38b56f9d9386..91b75abbd1a1 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -522,7 +522,8 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
struct net_device *
ieee802154_if_add(struct ieee802154_local *local, const char *name,
- enum nl802154_iftype type, __le64 extended_addr)
+ unsigned char name_assign_type, enum nl802154_iftype type,
+ __le64 extended_addr)
{
struct net_device *ndev = NULL;
struct ieee802154_sub_if_data *sdata = NULL;
@@ -531,7 +532,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
ASSERT_RTNL();
ndev = alloc_netdev(sizeof(*sdata) + local->hw.vif_data_size, name,
- NET_NAME_UNKNOWN, ieee802154_if_setup);
+ name_assign_type, ieee802154_if_setup);
if (!ndev)
return ERR_PTR(-ENOMEM);
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index dcf73958133a..5b2be12832e6 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -134,7 +134,7 @@ llsec_key_alloc(const struct ieee802154_llsec_key *template)
for (i = 0; i < ARRAY_SIZE(key->tfm); i++) {
key->tfm[i] = crypto_alloc_aead("ccm(aes)", 0,
CRYPTO_ALG_ASYNC);
- if (!key->tfm[i])
+ if (IS_ERR(key->tfm[i]))
goto err_tfm;
if (crypto_aead_setkey(key->tfm[i], template->key,
IEEE802154_LLSEC_KEY_SIZE))
@@ -144,7 +144,7 @@ llsec_key_alloc(const struct ieee802154_llsec_key *template)
}
key->tfm0 = crypto_alloc_blkcipher("ctr(aes)", 0, CRYPTO_ALG_ASYNC);
- if (!key->tfm0)
+ if (IS_ERR(key->tfm0))
goto err_tfm;
if (crypto_blkcipher_setkey(key->tfm0, template->key,
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 8500378c8318..08cb32dc8fd3 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -161,18 +161,21 @@ int ieee802154_register_hw(struct ieee802154_hw *hw)
rtnl_lock();
- dev = ieee802154_if_add(local, "wpan%d", NL802154_IFTYPE_NODE,
+ dev = ieee802154_if_add(local, "wpan%d", NET_NAME_ENUM,
+ NL802154_IFTYPE_NODE,
cpu_to_le64(0x0000000000000000ULL));
if (IS_ERR(dev)) {
rtnl_unlock();
rc = PTR_ERR(dev);
- goto out_wq;
+ goto out_phy;
}
rtnl_unlock();
return 0;
+out_phy:
+ wpan_phy_unregister(local->phy);
out_wq:
destroy_workqueue(local->workqueue);
out:
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 954810c76a86..7b3f732269e4 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -647,7 +647,7 @@ int nla_get_labels(const struct nlattr *nla,
return -EINVAL;
switch (dec.label) {
- case LABEL_IMPLICIT_NULL:
+ case MPLS_LABEL_IMPLNULL:
/* RFC3032: This is a label that an LSR may
* assign and distribute, but which never
* actually appears in the encapsulation.
@@ -935,7 +935,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
}
/* In case the predefined labels need to be populated */
- if (limit > LABEL_IPV4_EXPLICIT_NULL) {
+ if (limit > MPLS_LABEL_IPV4NULL) {
struct net_device *lo = net->loopback_dev;
rt0 = mpls_rt_alloc(lo->addr_len);
if (!rt0)
@@ -945,7 +945,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
rt0->rt_via_table = NEIGH_LINK_TABLE;
memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
}
- if (limit > LABEL_IPV6_EXPLICIT_NULL) {
+ if (limit > MPLS_LABEL_IPV6NULL) {
struct net_device *lo = net->loopback_dev;
rt2 = mpls_rt_alloc(lo->addr_len);
if (!rt2)
@@ -973,15 +973,15 @@ static int resize_platform_label_table(struct net *net, size_t limit)
memcpy(labels, old, cp_size);
/* If needed set the predefined labels */
- if ((old_limit <= LABEL_IPV6_EXPLICIT_NULL) &&
- (limit > LABEL_IPV6_EXPLICIT_NULL)) {
- RCU_INIT_POINTER(labels[LABEL_IPV6_EXPLICIT_NULL], rt2);
+ if ((old_limit <= MPLS_LABEL_IPV6NULL) &&
+ (limit > MPLS_LABEL_IPV6NULL)) {
+ RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2);
rt2 = NULL;
}
- if ((old_limit <= LABEL_IPV4_EXPLICIT_NULL) &&
- (limit > LABEL_IPV4_EXPLICIT_NULL)) {
- RCU_INIT_POINTER(labels[LABEL_IPV4_EXPLICIT_NULL], rt0);
+ if ((old_limit <= MPLS_LABEL_IPV4NULL) &&
+ (limit > MPLS_LABEL_IPV4NULL)) {
+ RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0);
rt0 = NULL;
}
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 693877d69606..b064c345042c 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -1,16 +1,6 @@
#ifndef MPLS_INTERNAL_H
#define MPLS_INTERNAL_H
-#define LABEL_IPV4_EXPLICIT_NULL 0 /* RFC3032 */
-#define LABEL_ROUTER_ALERT_LABEL 1 /* RFC3032 */
-#define LABEL_IPV6_EXPLICIT_NULL 2 /* RFC3032 */
-#define LABEL_IMPLICIT_NULL 3 /* RFC3032 */
-#define LABEL_ENTROPY_INDICATOR 7 /* RFC6790 */
-#define LABEL_GAL 13 /* RFC5586 */
-#define LABEL_OAM_ALERT 14 /* RFC3429 */
-#define LABEL_EXTENSION 15 /* RFC7274 */
-
-
struct mpls_shim_hdr {
__be32 label_stack_entry;
};
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f70e34a68f70..1c78d7fb1da7 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1,6 +1,13 @@
menu "Core Netfilter Configuration"
depends on NET && INET && NETFILTER
+config NETFILTER_INGRESS
+ bool "Netfilter ingress support"
+ select NET_INGRESS
+ help
+ This allows you to classify packets from ingress using the Netfilter
+ infrastructure.
+
config NETFILTER_NETLINK
tristate
@@ -863,6 +870,7 @@ config NETFILTER_XT_TARGET_TPROXY
depends on NETFILTER_XTABLES
depends on NETFILTER_ADVANCED
depends on (IPV6 || IPV6=n)
+ depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
@@ -1356,6 +1364,7 @@ config NETFILTER_XT_MATCH_SOCKET
depends on NETFILTER_ADVANCED
depends on !NF_CONNTRACK || NF_CONNTRACK
depends on (IPV6 || IPV6=n)
+ depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n)
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES
help
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index e6163017c42d..653e32eac08c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -64,10 +64,27 @@ static DEFINE_MUTEX(nf_hook_mutex);
int nf_register_hook(struct nf_hook_ops *reg)
{
+ struct list_head *nf_hook_list;
struct nf_hook_ops *elem;
mutex_lock(&nf_hook_mutex);
- list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
+ switch (reg->pf) {
+ case NFPROTO_NETDEV:
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->hooknum == NF_NETDEV_INGRESS) {
+ BUG_ON(reg->dev == NULL);
+ nf_hook_list = &reg->dev->nf_hooks_ingress;
+ net_inc_ingress_queue();
+ break;
+ }
+#endif
+ /* Fall through. */
+ default:
+ nf_hook_list = &nf_hooks[reg->pf][reg->hooknum];
+ break;
+ }
+
+ list_for_each_entry(elem, nf_hook_list, list) {
if (reg->priority < elem->priority)
break;
}
@@ -85,6 +102,18 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
mutex_lock(&nf_hook_mutex);
list_del_rcu(&reg->list);
mutex_unlock(&nf_hook_mutex);
+ switch (reg->pf) {
+ case NFPROTO_NETDEV:
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->hooknum == NF_NETDEV_INGRESS) {
+ net_dec_ingress_queue();
+ break;
+ }
+ break;
+#endif
+ default:
+ break;
+ }
#ifdef HAVE_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
@@ -166,11 +195,9 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
- elem = list_entry_rcu(&nf_hooks[state->pf][state->hook],
- struct nf_hook_ops, list);
+ elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list);
next_hook:
- verdict = nf_iterate(&nf_hooks[state->pf][state->hook], skb, state,
- &elem);
+ verdict = nf_iterate(state->hook_list, skb, state, &elem);
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 55b083ec587a..2fe6de46f6d0 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -36,6 +36,7 @@ IP_SET_MODULE_DESC("bitmap:ip", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_bitmap:ip");
#define MTYPE bitmap_ip
+#define HOST_MASK 32
/* Type structure */
struct bitmap_ip {
@@ -149,8 +150,11 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -174,7 +178,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > 32)
+ if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
} else
@@ -277,7 +281,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (cidr >= 32)
+ if (cidr >= HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(first_ip, last_ip, cidr);
} else
@@ -286,7 +290,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_NETMASK]) {
netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
- if (netmask > 32)
+ if (netmask > HOST_MASK)
return -IPSET_ERR_INVALID_NETMASK;
first_ip &= ip_set_hostmask(netmask);
@@ -360,7 +364,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 86104744b00f..eb188561d65f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -36,6 +36,7 @@ IP_SET_MODULE_DESC("bitmap:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
MODULE_ALIAS("ip_set_bitmap:ip,mac");
#define MTYPE bitmap_ipmac
+#define HOST_MASK 32
#define IP_SET_BITMAP_STORED_TIMEOUT
enum {
@@ -250,8 +251,11 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -343,7 +347,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (cidr >= 32)
+ if (cidr >= HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(first_ip, last_ip, cidr);
} else
@@ -397,7 +401,8 @@ static struct ip_set_type bitmap_ipmac_type = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 005dd36444c3..898edb693b3f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -294,7 +294,8 @@ static struct ip_set_type bitmap_port_type = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index d259da3ce67a..475e4960a164 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -42,7 +42,7 @@ static inline struct ip_set_net *ip_set_pernet(struct net *net)
}
#define IP_SET_INC 64
-#define STREQ(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
+#define STRNCMP(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0)
static unsigned int max_sets;
@@ -85,7 +85,7 @@ find_set_type(const char *name, u8 family, u8 revision)
struct ip_set_type *type;
list_for_each_entry_rcu(type, &ip_set_type_list, list)
- if (STREQ(type->name, name) &&
+ if (STRNCMP(type->name, name) &&
(type->family == family ||
type->family == NFPROTO_UNSPEC) &&
revision >= type->revision_min &&
@@ -132,7 +132,7 @@ __find_set_type_get(const char *name, u8 family, u8 revision,
/* Make sure the type is already loaded
* but we don't support the revision */
list_for_each_entry_rcu(type, &ip_set_type_list, list)
- if (STREQ(type->name, name)) {
+ if (STRNCMP(type->name, name)) {
err = -IPSET_ERR_FIND_TYPE;
goto unlock;
}
@@ -166,7 +166,7 @@ __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max,
*min = 255; *max = 0;
rcu_read_lock();
list_for_each_entry_rcu(type, &ip_set_type_list, list)
- if (STREQ(type->name, name) &&
+ if (STRNCMP(type->name, name) &&
(type->family == family ||
type->family == NFPROTO_UNSPEC)) {
found = true;
@@ -365,7 +365,7 @@ size_t
ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
{
enum ip_set_ext_id id;
- size_t offset = 0;
+ size_t offset = len;
u32 cadt_flags = 0;
if (tb[IPSET_ATTR_CADT_FLAGS])
@@ -375,12 +375,12 @@ ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len)
for (id = 0; id < IPSET_EXT_ID_MAX; id++) {
if (!add_extension(id, cadt_flags, tb))
continue;
- offset += ALIGN(len + offset, ip_set_extensions[id].align);
+ offset = ALIGN(offset, ip_set_extensions[id].align);
set->offset[id] = offset;
set->extensions |= ip_set_extensions[id].type;
offset += ip_set_extensions[id].len;
}
- return len + offset;
+ return offset;
}
EXPORT_SYMBOL_GPL(ip_set_elem_len);
@@ -432,6 +432,31 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
}
EXPORT_SYMBOL_GPL(ip_set_get_extensions);
+int
+ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
+ const void *e, bool active)
+{
+ if (SET_WITH_TIMEOUT(set)) {
+ unsigned long *timeout = ext_timeout(e, set);
+
+ if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
+ htonl(active ? ip_set_timeout_get(timeout)
+ : *timeout)))
+ return -EMSGSIZE;
+ }
+ if (SET_WITH_COUNTER(set) &&
+ ip_set_put_counter(skb, ext_counter(e, set)))
+ return -EMSGSIZE;
+ if (SET_WITH_COMMENT(set) &&
+ ip_set_put_comment(skb, ext_comment(e, set)))
+ return -EMSGSIZE;
+ if (SET_WITH_SKBINFO(set) &&
+ ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
+ return -EMSGSIZE;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ip_set_put_extensions);
+
/*
* Creating/destroying/renaming/swapping affect the existence and
* the properties of a set. All of these can be executed from userspace
@@ -581,7 +606,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
rcu_read_lock();
for (i = 0; i < inst->ip_set_max; i++) {
s = rcu_dereference(inst->ip_set_list)[i];
- if (s != NULL && STREQ(s->name, name)) {
+ if (s != NULL && STRNCMP(s->name, name)) {
__ip_set_get(s);
index = i;
*set = s;
@@ -758,7 +783,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
*id = IPSET_INVALID_ID;
for (i = 0; i < inst->ip_set_max; i++) {
set = ip_set(inst, i);
- if (set != NULL && STREQ(set->name, name)) {
+ if (set != NULL && STRNCMP(set->name, name)) {
*id = i;
break;
}
@@ -787,7 +812,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
if (s == NULL) {
if (*index == IPSET_INVALID_ID)
*index = i;
- } else if (STREQ(name, s->name)) {
+ } else if (STRNCMP(name, s->name)) {
/* Name clash */
*set = s;
return -EEXIST;
@@ -887,7 +912,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
if (ret == -EEXIST) {
/* If this is the same set and requested, ignore error */
if ((flags & IPSET_FLAG_EXIST) &&
- STREQ(set->type->name, clash->type->name) &&
+ STRNCMP(set->type->name, clash->type->name) &&
set->type->family == clash->type->family &&
set->type->revision_min == clash->type->revision_min &&
set->type->revision_max == clash->type->revision_max &&
@@ -1098,7 +1123,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
- if (s != NULL && STREQ(s->name, name2)) {
+ if (s != NULL && STRNCMP(s->name, name2)) {
ret = -IPSET_ERR_EXIST_SETNAME2;
goto out;
}
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 29fb01ddff93..1981f021cc60 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -98,7 +98,7 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
__be16 *port, u8 *proto)
{
const struct iphdr *iph = ip_hdr(skb);
- unsigned int protooff = ip_hdrlen(skb);
+ unsigned int protooff = skb_network_offset(skb) + ip_hdrlen(skb);
int protocol = iph->protocol;
/* See comments at tcp_match in ip_tables.c */
@@ -135,7 +135,9 @@ ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
__be16 frag_off = 0;
nexthdr = ipv6_hdr(skb)->nexthdr;
- protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ protoff = ipv6_skip_exthdr(skb,
+ skb_network_offset(skb) +
+ sizeof(struct ipv6hdr), &nexthdr,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
return false;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 974ff386db0f..7952869c8023 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -180,6 +180,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#undef mtype_data_equal
#undef mtype_do_data_match
#undef mtype_data_set_flags
+#undef mtype_data_reset_elem
#undef mtype_data_reset_flags
#undef mtype_data_netmask
#undef mtype_data_list
@@ -193,7 +194,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#undef mtype_ahash_memsize
#undef mtype_flush
#undef mtype_destroy
-#undef mtype_gc_init
#undef mtype_same_set
#undef mtype_kadt
#undef mtype_uadt
@@ -227,6 +227,7 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#define mtype_data_list IPSET_TOKEN(MTYPE, _data_list)
#define mtype_data_next IPSET_TOKEN(MTYPE, _data_next)
#define mtype_elem IPSET_TOKEN(MTYPE, _elem)
+
#define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy)
#define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup)
#define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr)
@@ -234,7 +235,6 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize)
#define mtype_flush IPSET_TOKEN(MTYPE, _flush)
#define mtype_destroy IPSET_TOKEN(MTYPE, _destroy)
-#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
#define mtype_same_set IPSET_TOKEN(MTYPE, _same_set)
#define mtype_kadt IPSET_TOKEN(MTYPE, _kadt)
#define mtype_uadt IPSET_TOKEN(MTYPE, _uadt)
@@ -249,9 +249,18 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
#define mtype_head IPSET_TOKEN(MTYPE, _head)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
+#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
+#ifndef MTYPE
+#error "MTYPE is not defined!"
+#endif
+
+#ifndef HOST_MASK
+#error "HOST_MASK is not defined!"
+#endif
+
#ifndef HKEY_DATALEN
#define HKEY_DATALEN sizeof(struct mtype_elem)
#endif
@@ -261,6 +270,9 @@ hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
& jhash_mask(htable_bits))
#ifndef htype
+#ifndef HTYPE
+#error "HTYPE is not defined!"
+#endif /* HTYPE */
#define htype HTYPE
/* The generic hash structure */
@@ -287,7 +299,7 @@ struct htype {
struct net_prefixes nets[0]; /* book-keeping of prefixes */
#endif
};
-#endif
+#endif /* htype */
#ifdef IP_SET_HASH_WITH_NETS
/* Network cidr size book keeping when the hash stores different
@@ -1045,7 +1057,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
u8 netmask;
#endif
size_t hsize;
- struct HTYPE *h;
+ struct htype *h;
struct htable *t;
#ifndef IP_SET_PROTO_UNDEF
@@ -1165,3 +1177,5 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
return 0;
}
#endif /* IP_SET_EMIT_CREATE */
+
+#undef HKEY_DATALEN
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 76959d79e9d1..54df48b5c455 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -56,15 +56,15 @@ hash_ip4_data_equal(const struct hash_ip4_elem *e1,
return e1->ip == e2->ip;
}
-static inline bool
+static bool
hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *e)
{
if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -74,7 +74,6 @@ hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e)
}
#define MTYPE hash_ip4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
@@ -121,8 +120,11 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -145,7 +147,7 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > 32)
+ if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
}
@@ -196,10 +198,10 @@ hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *e)
{
if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -208,12 +210,9 @@ hash_ip6_data_next(struct hash_ip4_elem *next, const struct hash_ip6_elem *e)
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
-#undef HKEY_DATALEN
#define MTYPE hash_ip6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
@@ -261,8 +260,11 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -301,7 +303,8 @@ static struct ip_set_type hash_ip_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 7abf9788cfa8..d231248eb3e2 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -63,10 +63,10 @@ hash_ipmark4_data_list(struct sk_buff *skb,
if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -76,10 +76,8 @@ hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
next->ip = d->ip;
}
-#define MTYPE hash_ipmark4
-#define PF 4
-#define HOST_MASK 32
-#define HKEY_DATALEN sizeof(struct hash_ipmark4_elem)
+#define MTYPE hash_ipmark4
+#define HOST_MASK 32
#include "ip_set_hash_gen.h"
static int
@@ -123,12 +121,15 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
return ret;
- e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+ ret = ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
e.mark &= h->markmask;
if (adt == IPSET_TEST ||
@@ -147,7 +148,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > 32)
+ if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
}
@@ -191,10 +192,10 @@ hash_ipmark6_data_list(struct sk_buff *skb,
if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) ||
nla_put_net32(skb, IPSET_ATTR_MARK, htonl(data->mark)))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -204,15 +205,11 @@ hash_ipmark6_data_next(struct hash_ipmark4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
-#undef HKEY_DATALEN
#define MTYPE hash_ipmark6
-#define PF 6
#define HOST_MASK 128
-#define HKEY_DATALEN sizeof(struct hash_ipmark6_elem)
-#define IP_SET_EMIT_CREATE
+#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
@@ -258,12 +255,15 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
- e.mark = ntohl(nla_get_u32(tb[IPSET_ATTR_MARK]));
+ e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK]));
e.mark &= h->markmask;
if (adt == IPSET_TEST) {
@@ -307,7 +307,8 @@ static struct ip_set_type hash_ipmark_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index dcbcceb9a52f..a47c29f12090 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -69,10 +69,10 @@ hash_ipport4_data_list(struct sk_buff *skb,
nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -83,10 +83,8 @@ hash_ipport4_data_next(struct hash_ipport4_elem *next,
next->port = d->port;
}
-#define MTYPE hash_ipport4
-#define PF 4
-#define HOST_MASK 32
-#define HKEY_DATALEN sizeof(struct hash_ipport4_elem)
+#define MTYPE hash_ipport4
+#define HOST_MASK 32
#include "ip_set_hash_gen.h"
static int
@@ -132,15 +130,15 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
return ret;
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ ret = ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -171,7 +169,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > 32)
+ if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
}
@@ -231,10 +229,10 @@ hash_ipport6_data_list(struct sk_buff *skb,
nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -245,15 +243,11 @@ hash_ipport6_data_next(struct hash_ipport4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
-#undef HKEY_DATALEN
#define MTYPE hash_ipport6
-#define PF 6
#define HOST_MASK 128
-#define HKEY_DATALEN sizeof(struct hash_ipport6_elem)
-#define IP_SET_EMIT_CREATE
+#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
static int
@@ -301,15 +295,15 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
if (ret)
return ret;
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ ret = ip_set_get_extensions(set, tb, &ext);
+ if (ret)
+ return ret;
+
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -376,7 +370,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 7ef93fc887a1..89615f134845 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -70,10 +70,10 @@ hash_ipportip4_data_list(struct sk_buff *skb,
nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -86,7 +86,6 @@ hash_ipportip4_data_next(struct hash_ipportip4_elem *next,
/* Common functions */
#define MTYPE hash_ipportip4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
@@ -134,8 +133,11 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -143,10 +145,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -177,7 +176,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > 32)
+ if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
}
@@ -240,10 +239,10 @@ hash_ipportip6_data_list(struct sk_buff *skb,
nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -254,11 +253,9 @@ hash_ipportip6_data_next(struct hash_ipportip4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
#define MTYPE hash_ipportip6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
@@ -309,8 +306,11 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -318,10 +318,7 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -388,7 +385,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index b6012ad92781..6ba7a7e083f9 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -114,10 +114,10 @@ hash_ipportnet4_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -130,7 +130,6 @@ hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next,
}
#define MTYPE hash_ipportnet4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
@@ -189,8 +188,11 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -205,10 +207,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
e.cidr = cidr - 1;
}
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -249,7 +248,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else if (tb[IPSET_ATTR_CIDR]) {
cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!cidr || cidr > 32)
+ if (!cidr || cidr > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
ip_set_mask_from_to(ip, ip_to, cidr);
}
@@ -367,10 +366,10 @@ hash_ipportnet6_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -381,11 +380,9 @@ hash_ipportnet6_data_next(struct hash_ipportnet4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
#define MTYPE hash_ipportnet6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
@@ -448,8 +445,11 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -466,10 +466,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
ip6_netmask(&e.ip2, e.cidr + 1);
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -547,7 +544,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index 65690b52a4d5..1f8668d7a538 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -52,7 +52,12 @@ hash_mac4_data_equal(const struct hash_mac4_elem *e1,
static inline bool
hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
{
- return nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether);
+ if (nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
+ goto nla_put_failure;
+ return false;
+
+nla_put_failure:
+ return true;
}
static inline void
@@ -62,7 +67,6 @@ hash_mac4_data_next(struct hash_mac4_elem *next,
}
#define MTYPE hash_mac4
-#define PF 4
#define HOST_MASK 32
#define IP_SET_EMIT_CREATE
#define IP_SET_PROTO_UNDEF
@@ -149,7 +153,8 @@ static struct ip_set_type hash_mac_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 6b3ac10ac2f1..2e63dad8644d 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -95,10 +95,10 @@ hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data)
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -109,7 +109,6 @@ hash_net4_data_next(struct hash_net4_elem *next,
}
#define MTYPE hash_net4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
@@ -160,8 +159,11 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -264,10 +266,10 @@ hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data)
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -277,11 +279,9 @@ hash_net6_data_next(struct hash_net4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
#define MTYPE hash_net6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
@@ -333,8 +333,11 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -383,7 +386,8 @@ static struct ip_set_type hash_net_type __read_mostly = {
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 380ef5148ea1..fe481f677f56 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -193,10 +193,10 @@ hash_netiface4_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -207,7 +207,6 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next,
}
#define MTYPE hash_netiface4
-#define PF 4
#define HOST_MASK 32
#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
#include "ip_set_hash_gen.h"
@@ -308,8 +307,11 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -444,10 +446,10 @@ hash_netiface6_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -457,12 +459,9 @@ hash_netiface6_data_next(struct hash_netiface4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
-#undef HKEY_DATALEN
#define MTYPE hash_netiface6
-#define PF 6
#define HOST_MASK 128
#define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed)
#define IP_SET_EMIT_CREATE
@@ -546,8 +545,11 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -613,7 +615,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index ea8772afb6e7..847047483560 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -128,7 +128,6 @@ hash_netnet4_data_next(struct hash_netnet4_elem *next,
}
#define MTYPE hash_netnet4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
@@ -182,9 +181,15 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -354,11 +359,9 @@ hash_netnet6_data_next(struct hash_netnet4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
#define MTYPE hash_netnet6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
@@ -411,9 +414,15 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) ||
- ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -470,7 +479,8 @@ static struct ip_set_type hash_netnet_type __read_mostly = {
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index c0ddb58d19dc..8273819c1a2f 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -110,10 +110,10 @@ hash_netport4_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -125,7 +125,6 @@ hash_netport4_data_next(struct hash_netport4_elem *next,
}
#define MTYPE hash_netport4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
@@ -182,8 +181,11 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -194,10 +196,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
e.cidr = cidr - 1;
}
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -326,10 +325,10 @@ hash_netport6_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -340,11 +339,9 @@ hash_netport6_data_next(struct hash_netport4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
#define MTYPE hash_netport6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
@@ -404,8 +401,11 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -417,10 +417,7 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
}
ip6_netmask(&e.ip, e.cidr + 1);
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -495,7 +492,8 @@ static struct ip_set_type hash_netport_type __read_mostly = {
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index bfaa94c7baa7..1451a8ac938f 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -54,7 +54,7 @@ struct hash_netportnet4_elem {
u16 ccmp;
};
u16 padding;
- u8 nomatch:1;
+ u8 nomatch;
u8 proto;
};
@@ -124,10 +124,10 @@ hash_netportnet4_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -139,7 +139,6 @@ hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
}
#define MTYPE hash_netportnet4
-#define PF 4
#define HOST_MASK 32
#include "ip_set_hash_gen.h"
@@ -200,9 +199,15 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
- ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -220,10 +225,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
e.cidr[1] = cidr;
}
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -326,7 +328,7 @@ struct hash_netportnet6_elem {
u16 ccmp;
};
u16 padding;
- u8 nomatch:1;
+ u8 nomatch;
u8 proto;
};
@@ -397,10 +399,10 @@ hash_netportnet6_data_list(struct sk_buff *skb,
(flags &&
nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
goto nla_put_failure;
- return 0;
+ return false;
nla_put_failure:
- return 1;
+ return true;
}
static inline void
@@ -411,11 +413,9 @@ hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
}
#undef MTYPE
-#undef PF
#undef HOST_MASK
#define MTYPE hash_netportnet6
-#define PF 6
#define HOST_MASK 128
#define IP_SET_EMIT_CREATE
#include "ip_set_hash_gen.h"
@@ -477,9 +477,15 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_LINENO])
*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
- ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]) ||
- ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]) ||
- ip_set_get_extensions(set, tb, &ext);
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip[0]);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip[1]);
+ if (ret)
+ return ret;
+
+ ret = ip_set_get_extensions(set, tb, &ext);
if (ret)
return ret;
@@ -496,10 +502,7 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
ip6_netmask(&e.ip[0], e.cidr[0]);
ip6_netmask(&e.ip[1], e.cidr[1]);
- if (tb[IPSET_ATTR_PORT])
- e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
- else
- return -IPSET_ERR_PROTOCOL;
+ e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
if (tb[IPSET_ATTR_PROTO]) {
e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
@@ -577,7 +580,8 @@ static struct ip_set_type hash_netportnet_type __read_mostly = {
[IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index f8f682806e36..5bd3b1eae3fa 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -678,7 +678,8 @@ static struct ip_set_type list_set_type __read_mostly = {
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
- [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING },
+ [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAX_COMMENT_SIZE },
[IPSET_ATTR_SKBMARK] = { .type = NLA_U64 },
[IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 },
[IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 },
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 49532672f66d..285eae3a1454 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3823,6 +3823,9 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
cancel_work_sync(&ipvs->defense_work.work);
unregister_net_sysctl_table(ipvs->sysctl_hdr);
ip_vs_stop_estimator(net, &ipvs->tot_stats);
+
+ if (!net_eq(net, &init_net))
+ kfree(ipvs->sysctl_tbl);
}
#else
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 19b9cce6c210..b08ba9538d12 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1457,18 +1457,12 @@ static struct socket *make_send_sock(struct net *net, int id)
struct socket *sock;
int result;
- /* First create a socket move it to right name space later */
- result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ /* First create a socket */
+ result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result);
}
- /*
- * Kernel sockets that are a part of a namespace, should not
- * hold a reference to a namespace in order to allow to stop it.
- * After sk_change_net should be released using sk_release_kernel.
- */
- sk_change_net(sock->sk, net);
result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
if (result < 0) {
pr_err("Error setting outbound mcast interface\n");
@@ -1497,7 +1491,7 @@ static struct socket *make_send_sock(struct net *net, int id)
return sock;
error:
- sk_release_kernel(sock->sk);
+ sock_release(sock);
return ERR_PTR(result);
}
@@ -1518,17 +1512,11 @@ static struct socket *make_receive_sock(struct net *net, int id)
int result;
/* First create a socket */
- result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+ result = sock_create_kern(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
return ERR_PTR(result);
}
- /*
- * Kernel sockets that are a part of a namespace, should not
- * hold a reference to a namespace in order to allow to stop it.
- * After sk_change_net should be released using sk_release_kernel.
- */
- sk_change_net(sock->sk, net);
/* it is equivalent to the REUSEADDR option in user-space */
sock->sk->sk_reuse = SK_CAN_REUSE;
result = sysctl_sync_sock_size(ipvs);
@@ -1554,7 +1542,7 @@ static struct socket *make_receive_sock(struct net *net, int id)
return sock;
error:
- sk_release_kernel(sock->sk);
+ sock_release(sock);
return ERR_PTR(result);
}
@@ -1692,7 +1680,7 @@ done:
ip_vs_sync_buff_release(sb);
/* release the sending multicast socket */
- sk_release_kernel(tinfo->sock->sk);
+ sock_release(tinfo->sock);
kfree(tinfo);
return 0;
@@ -1729,7 +1717,7 @@ static int sync_thread_backup(void *data)
}
/* release the sending multicast socket */
- sk_release_kernel(tinfo->sock->sk);
+ sock_release(tinfo->sock);
kfree(tinfo->buf);
kfree(tinfo);
@@ -1854,11 +1842,11 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
return 0;
outsocket:
- sk_release_kernel(sock->sk);
+ sock_release(sock);
outtinfo:
if (tinfo) {
- sk_release_kernel(tinfo->sock->sk);
+ sock_release(tinfo->sock);
kfree(tinfo->buf);
kfree(tinfo);
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 5caa0c41bf26..70383de72054 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -202,7 +202,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK.
* sCW -> sCW
- * sLA -> sTW Last ACK detected.
+ * sLA -> sTW Last ACK detected (RFC5961 challenged)
* sTW -> sTW Retransmitted last ACK. Remain in the same state.
* sCL -> sCL
*/
@@ -261,7 +261,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK.
* sCW -> sCW
- * sLA -> sTW Last ACK detected.
+ * sLA -> sTW Last ACK detected (RFC5961 challenged)
* sTW -> sTW Retransmitted last ACK.
* sCL -> sCL
*/
@@ -906,6 +906,7 @@ static int tcp_packet(struct nf_conn *ct,
1 : ct->proto.tcp.last_win;
ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
ct->proto.tcp.last_wscale;
+ ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
ct->proto.tcp.last_flags;
memset(&ct->proto.tcp.seen[dir], 0,
@@ -923,7 +924,9 @@ static int tcp_packet(struct nf_conn *ct,
* may be in sync but we are not. In that case, we annotate
* the TCP options and let the packet go through. If it is a
* valid SYN packet, the server will reply with a SYN/ACK, and
- * then we'll get in sync. Otherwise, the server ignores it. */
+ * then we'll get in sync. Otherwise, the server potentially
+ * responds with a challenge ACK if implementing RFC5961.
+ */
if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
struct ip_ct_tcp_state seen = {};
@@ -939,6 +942,13 @@ static int tcp_packet(struct nf_conn *ct,
ct->proto.tcp.last_flags |=
IP_CT_TCP_FLAG_SACK_PERM;
}
+ /* Mark the potential for RFC5961 challenge ACK,
+ * this pose a special problem for LAST_ACK state
+ * as ACK is intrepretated as ACKing last FIN.
+ */
+ if (old_state == TCP_CONNTRACK_LAST_ACK)
+ ct->proto.tcp.last_flags |=
+ IP_CT_EXP_CHALLENGE_ACK;
}
spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
@@ -970,6 +980,25 @@ static int tcp_packet(struct nf_conn *ct,
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid state ");
return -NF_ACCEPT;
+ case TCP_CONNTRACK_TIME_WAIT:
+ /* RFC5961 compliance cause stack to send "challenge-ACK"
+ * e.g. in response to spurious SYNs. Conntrack MUST
+ * not believe this ACK is acking last FIN.
+ */
+ if (old_state == TCP_CONNTRACK_LAST_ACK &&
+ index == TCP_ACK_SET &&
+ ct->proto.tcp.last_dir != dir &&
+ ct->proto.tcp.last_index == TCP_SYN_SET &&
+ (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
+ /* Detected RFC5961 challenge ACK */
+ ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
+ spin_unlock_bh(&ct->lock);
+ if (LOG_INVALID(net, IPPROTO_TCP))
+ nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
+ "nf_ct_tcp: challenge-ACK ignored ");
+ return NF_ACCEPT; /* Don't change state */
+ }
+ break;
case TCP_CONNTRACK_CLOSE:
if (index == TCP_RST_SET
&& (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 78af83bc9c8e..34ded09317e7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4340,7 +4340,6 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
case NFT_CONTINUE:
case NFT_BREAK:
case NFT_RETURN:
- desc->len = sizeof(data->verdict);
break;
case NFT_JUMP:
case NFT_GOTO:
@@ -4355,10 +4354,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
chain->use++;
data->verdict.chain = chain;
- desc->len = sizeof(data);
break;
}
+ desc->len = sizeof(data->verdict);
desc->type = NFT_DATA_VERDICT;
return 0;
}
@@ -4473,9 +4472,9 @@ EXPORT_SYMBOL_GPL(nft_data_init);
*/
void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
{
- switch (type) {
- case NFT_DATA_VALUE:
+ if (type < NFT_DATA_VERDICT)
return;
+ switch (type) {
case NFT_DATA_VERDICT:
return nft_verdict_uninit(data);
default:
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 3ad91266c821..4ef1fae8445e 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -1073,7 +1073,13 @@ static struct pernet_operations nfnl_log_net_ops = {
static int __init nfnetlink_log_init(void)
{
- int status = -ENOMEM;
+ int status;
+
+ status = register_pernet_subsys(&nfnl_log_net_ops);
+ if (status < 0) {
+ pr_err("failed to register pernet ops\n");
+ goto out;
+ }
netlink_register_notifier(&nfulnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfulnl_subsys);
@@ -1088,28 +1094,23 @@ static int __init nfnetlink_log_init(void)
goto cleanup_subsys;
}
- status = register_pernet_subsys(&nfnl_log_net_ops);
- if (status < 0) {
- pr_err("failed to register pernet ops\n");
- goto cleanup_logger;
- }
return status;
-cleanup_logger:
- nf_log_unregister(&nfulnl_logger);
cleanup_subsys:
nfnetlink_subsys_unregister(&nfulnl_subsys);
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+ unregister_pernet_subsys(&nfnl_log_net_ops);
+out:
return status;
}
static void __exit nfnetlink_log_fini(void)
{
- unregister_pernet_subsys(&nfnl_log_net_ops);
nf_log_unregister(&nfulnl_logger);
nfnetlink_subsys_unregister(&nfulnl_subsys);
netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+ unregister_pernet_subsys(&nfnl_log_net_ops);
}
MODULE_DESCRIPTION("netfilter userspace logging");
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 0b98c7420239..22a5ac76683e 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -1257,7 +1257,7 @@ static int seq_show(struct seq_file *s, void *v)
inst->copy_mode, inst->copy_range,
inst->queue_dropped, inst->queue_user_dropped,
inst->id_sequence, 1);
- return seq_has_overflowed(s);
+ return 0;
}
static const struct seq_operations nfqnl_seq_ops = {
@@ -1317,7 +1317,13 @@ static struct pernet_operations nfnl_queue_net_ops = {
static int __init nfnetlink_queue_init(void)
{
- int status = -ENOMEM;
+ int status;
+
+ status = register_pernet_subsys(&nfnl_queue_net_ops);
+ if (status < 0) {
+ pr_err("nf_queue: failed to register pernet ops\n");
+ goto out;
+ }
netlink_register_notifier(&nfqnl_rtnl_notifier);
status = nfnetlink_subsys_register(&nfqnl_subsys);
@@ -1326,19 +1332,13 @@ static int __init nfnetlink_queue_init(void)
goto cleanup_netlink_notifier;
}
- status = register_pernet_subsys(&nfnl_queue_net_ops);
- if (status < 0) {
- pr_err("nf_queue: failed to register pernet ops\n");
- goto cleanup_subsys;
- }
register_netdevice_notifier(&nfqnl_dev_notifier);
nf_register_queue_handler(&nfqh);
return status;
-cleanup_subsys:
- nfnetlink_subsys_unregister(&nfqnl_subsys);
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+out:
return status;
}
@@ -1346,9 +1346,9 @@ static void __exit nfnetlink_queue_fini(void)
{
nf_unregister_queue_handler();
unregister_netdevice_notifier(&nfqnl_dev_notifier);
- unregister_pernet_subsys(&nfnl_queue_net_ops);
nfnetlink_subsys_unregister(&nfqnl_subsys);
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+ unregister_pernet_subsys(&nfnl_queue_net_ops);
rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 7f29cfc76349..66def315eb56 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -161,6 +161,7 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
par->hook_mask = 0;
}
par->family = ctx->afi->family;
+ par->nft_compat = true;
}
static void target_compat_from_user(struct xt_target *t, void *in, void *out)
@@ -377,6 +378,7 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
par->hook_mask = 0;
}
par->family = ctx->afi->family;
+ par->nft_compat = true;
}
static void match_compat_from_user(struct xt_match *m, void *in, void *out)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 51a459c3c649..83032464a4bd 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -947,11 +947,9 @@ static int xt_table_seq_show(struct seq_file *seq, void *v)
{
struct xt_table *table = list_entry(v, struct xt_table, list);
- if (strlen(table->name)) {
+ if (*table->name)
seq_printf(seq, "%s\n", table->name);
- return seq_has_overflowed(seq);
- } else
- return 0;
+ return 0;
}
static const struct seq_operations xt_table_seq_ops = {
@@ -1087,10 +1085,8 @@ static int xt_match_seq_show(struct seq_file *seq, void *v)
if (trav->curr == trav->head)
return 0;
match = list_entry(trav->curr, struct xt_match, list);
- if (*match->name == '\0')
- return 0;
- seq_printf(seq, "%s\n", match->name);
- return seq_has_overflowed(seq);
+ if (*match->name)
+ seq_printf(seq, "%s\n", match->name);
}
return 0;
}
@@ -1142,10 +1138,8 @@ static int xt_target_seq_show(struct seq_file *seq, void *v)
if (trav->curr == trav->head)
return 0;
target = list_entry(trav->curr, struct xt_target, list);
- if (*target->name == '\0')
- return 0;
- seq_printf(seq, "%s\n", target->name);
- return seq_has_overflowed(seq);
+ if (*target->name)
+ seq_printf(seq, "%s\n", target->name);
}
return 0;
}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index e762de5ee89b..8c3190e2fc6a 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -277,6 +277,9 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
"FORWARD, OUTPUT and POSTROUTING hooks\n");
return -EINVAL;
}
+ if (par->nft_compat)
+ return 0;
+
xt_ematch_foreach(ematch, e)
if (find_syn_match(ematch))
return 0;
@@ -299,6 +302,9 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
"FORWARD, OUTPUT and POSTROUTING hooks\n");
return -EINVAL;
}
+ if (par->nft_compat)
+ return 0;
+
xt_ematch_foreach(ematch, e)
if (find_syn_match(ematch))
return 0;
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 23345238711b..ebd41dc501e5 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -23,6 +23,7 @@ MODULE_ALIAS("ipt_mark");
MODULE_ALIAS("ip6t_mark");
MODULE_ALIAS("ipt_MARK");
MODULE_ALIAS("ip6t_MARK");
+MODULE_ALIAS("arpt_MARK");
static unsigned int
mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 89045982ec94..b103e9627716 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -15,8 +15,9 @@
#include <linux/skbuff.h>
#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_set.h>
+#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <uapi/linux/netfilter/xt_set.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ec4adbdcb9b4..69d67c300b80 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -76,20 +76,21 @@ struct listeners {
};
/* state bits */
-#define NETLINK_CONGESTED 0x0
+#define NETLINK_S_CONGESTED 0x0
/* flags */
-#define NETLINK_KERNEL_SOCKET 0x1
-#define NETLINK_RECV_PKTINFO 0x2
-#define NETLINK_BROADCAST_SEND_ERROR 0x4
-#define NETLINK_RECV_NO_ENOBUFS 0x8
+#define NETLINK_F_KERNEL_SOCKET 0x1
+#define NETLINK_F_RECV_PKTINFO 0x2
+#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
+#define NETLINK_F_RECV_NO_ENOBUFS 0x8
+#define NETLINK_F_LISTEN_ALL_NSID 0x10
static inline int netlink_is_kernel(struct sock *sk)
{
- return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
+ return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET;
}
-struct netlink_table *nl_table;
+struct netlink_table *nl_table __read_mostly;
EXPORT_SYMBOL_GPL(nl_table);
static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
@@ -256,8 +257,9 @@ static void netlink_overrun(struct sock *sk)
{
struct netlink_sock *nlk = nlk_sk(sk);
- if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
- if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
+ if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) {
+ if (!test_and_set_bit(NETLINK_S_CONGESTED,
+ &nlk_sk(sk)->state)) {
sk->sk_err = ENOBUFS;
sk->sk_error_report(sk);
}
@@ -270,8 +272,8 @@ static void netlink_rcv_wake(struct sock *sk)
struct netlink_sock *nlk = nlk_sk(sk);
if (skb_queue_empty(&sk->sk_receive_queue))
- clear_bit(NETLINK_CONGESTED, &nlk->state);
- if (!test_bit(NETLINK_CONGESTED, &nlk->state))
+ clear_bit(NETLINK_S_CONGESTED, &nlk->state);
+ if (!test_bit(NETLINK_S_CONGESTED, &nlk->state))
wake_up_interruptible(&nlk->wait);
}
@@ -1081,6 +1083,7 @@ static int netlink_insert(struct sock *sk, u32 portid)
if (err) {
if (err == -EEXIST)
err = -EADDRINUSE;
+ nlk_sk(sk)->portid = 0;
sock_put(sk);
}
@@ -1117,14 +1120,15 @@ static struct proto netlink_proto = {
};
static int __netlink_create(struct net *net, struct socket *sock,
- struct mutex *cb_mutex, int protocol)
+ struct mutex *cb_mutex, int protocol,
+ int kern)
{
struct sock *sk;
struct netlink_sock *nlk;
sock->ops = &netlink_ops;
- sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
+ sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern);
if (!sk)
return -ENOMEM;
@@ -1186,7 +1190,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
if (err < 0)
goto out;
- err = __netlink_create(net, sock, cb_mutex, protocol);
+ err = __netlink_create(net, sock, cb_mutex, protocol, kern);
if (err < 0)
goto out_module;
@@ -1296,20 +1300,24 @@ static int netlink_autobind(struct socket *sock)
struct netlink_table *table = &nl_table[sk->sk_protocol];
s32 portid = task_tgid_vnr(current);
int err;
- static s32 rover = -4097;
+ s32 rover = -4096;
+ bool ok;
retry:
cond_resched();
rcu_read_lock();
- if (__netlink_lookup(table, portid, net)) {
+ ok = !__netlink_lookup(table, portid, net);
+ rcu_read_unlock();
+ if (!ok) {
/* Bind collision, search negative portid values. */
- portid = rover--;
- if (rover > -4097)
+ if (rover == -4096)
+ /* rover will be in range [S32_MIN, -4097] */
+ rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN);
+ else if (rover >= -4096)
rover = -4097;
- rcu_read_unlock();
+ portid = rover--;
goto retry;
}
- rcu_read_unlock();
err = netlink_insert(sk, portid);
if (err == -EADDRINUSE)
@@ -1656,7 +1664,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
nlk = nlk_sk(sk);
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_CONGESTED, &nlk->state)) &&
+ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
!netlink_skb_is_mmaped(skb)) {
DECLARE_WAITQUEUE(wait, current);
if (!*timeo) {
@@ -1671,7 +1679,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
add_wait_queue(&nlk->wait, &wait);
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_CONGESTED, &nlk->state)) &&
+ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
!sock_flag(sk, SOCK_DEAD))
*timeo = schedule_timeout(*timeo);
@@ -1895,7 +1903,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
struct netlink_sock *nlk = nlk_sk(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
- !test_bit(NETLINK_CONGESTED, &nlk->state)) {
+ !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
netlink_skb_set_owner_r(skb, sk);
__netlink_sendskb(sk, skb);
return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
@@ -1931,8 +1939,17 @@ static void do_one_broadcast(struct sock *sk,
!test_bit(p->group - 1, nlk->groups))
return;
- if (!net_eq(sock_net(sk), p->net))
- return;
+ if (!net_eq(sock_net(sk), p->net)) {
+ if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
+ return;
+
+ if (!peernet_has_id(sock_net(sk), p->net))
+ return;
+
+ if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns,
+ CAP_NET_BROADCAST))
+ return;
+ }
if (p->failure) {
netlink_overrun(sk);
@@ -1956,23 +1973,33 @@ static void do_one_broadcast(struct sock *sk,
netlink_overrun(sk);
/* Clone failed. Notify ALL listeners. */
p->failure = 1;
- if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
+ if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
p->delivery_failure = 1;
- } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+ goto out;
+ }
+ if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
- } else if (sk_filter(sk, p->skb2)) {
+ goto out;
+ }
+ if (sk_filter(sk, p->skb2)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
- } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
+ goto out;
+ }
+ NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net);
+ NETLINK_CB(p->skb2).nsid_is_set = true;
+ val = netlink_broadcast_deliver(sk, p->skb2);
+ if (val < 0) {
netlink_overrun(sk);
- if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
+ if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
p->delivery_failure = 1;
} else {
p->congested |= val;
p->delivered = 1;
p->skb2 = NULL;
}
+out:
sock_put(sk);
}
@@ -2057,7 +2084,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
!test_bit(p->group - 1, nlk->groups))
goto out;
- if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
+ if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) {
ret = 1;
goto out;
}
@@ -2076,7 +2103,7 @@ out:
* @code: error code, must be negative (as usual in kernelspace)
*
* This function returns the number of broadcast listeners that have set the
- * NETLINK_RECV_NO_ENOBUFS socket option.
+ * NETLINK_NO_ENOBUFS socket option.
*/
int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
{
@@ -2136,9 +2163,9 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case NETLINK_PKTINFO:
if (val)
- nlk->flags |= NETLINK_RECV_PKTINFO;
+ nlk->flags |= NETLINK_F_RECV_PKTINFO;
else
- nlk->flags &= ~NETLINK_RECV_PKTINFO;
+ nlk->flags &= ~NETLINK_F_RECV_PKTINFO;
err = 0;
break;
case NETLINK_ADD_MEMBERSHIP:
@@ -2167,18 +2194,18 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
}
case NETLINK_BROADCAST_ERROR:
if (val)
- nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
+ nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR;
else
- nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
+ nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR;
err = 0;
break;
case NETLINK_NO_ENOBUFS:
if (val) {
- nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
- clear_bit(NETLINK_CONGESTED, &nlk->state);
+ nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS;
+ clear_bit(NETLINK_S_CONGESTED, &nlk->state);
wake_up_interruptible(&nlk->wait);
} else {
- nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
+ nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS;
}
err = 0;
break;
@@ -2201,6 +2228,16 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
break;
}
#endif /* CONFIG_NETLINK_MMAP */
+ case NETLINK_LISTEN_ALL_NSID:
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
+ return -EPERM;
+
+ if (val)
+ nlk->flags |= NETLINK_F_LISTEN_ALL_NSID;
+ else
+ nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID;
+ err = 0;
+ break;
default:
err = -ENOPROTOOPT;
}
@@ -2227,7 +2264,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(int))
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
+ val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0;
if (put_user(len, optlen) ||
put_user(val, optval))
return -EFAULT;
@@ -2237,7 +2274,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(int))
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
+ val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0;
if (put_user(len, optlen) ||
put_user(val, optval))
return -EFAULT;
@@ -2247,7 +2284,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(int))
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
+ val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0;
if (put_user(len, optlen) ||
put_user(val, optval))
return -EFAULT;
@@ -2267,6 +2304,16 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
}
+static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
+{
+ if (!NETLINK_CB(skb).nsid_is_set)
+ return;
+
+ put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int),
+ &NETLINK_CB(skb).nsid);
+}
+
static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
@@ -2418,8 +2465,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
msg->msg_namelen = sizeof(*addr);
}
- if (nlk->flags & NETLINK_RECV_PKTINFO)
+ if (nlk->flags & NETLINK_F_RECV_PKTINFO)
netlink_cmsg_recv_pktinfo(msg, skb);
+ if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+ netlink_cmsg_listen_all_nsid(sk, msg, skb);
memset(&scm, 0, sizeof(scm));
scm.creds = *NETLINK_CREDS(skb);
@@ -2473,17 +2522,10 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
- /*
- * We have to just have a reference on the net from sk, but don't
- * get_net it. Besides, we cannot get and then put the net here.
- * So we create one inside init_net and the move it to net.
- */
-
- if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
+ if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0)
goto out_sock_release_nosk;
sk = sock->sk;
- sk_change_net(sk, net);
if (!cfg || cfg->groups < 32)
groups = 32;
@@ -2502,7 +2544,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
goto out_sock_release;
nlk = nlk_sk(sk);
- nlk->flags |= NETLINK_KERNEL_SOCKET;
+ nlk->flags |= NETLINK_F_KERNEL_SOCKET;
netlink_table_grab();
if (!nl_table[unit].registered) {
@@ -2539,7 +2581,10 @@ EXPORT_SYMBOL(__netlink_kernel_create);
void
netlink_kernel_release(struct sock *sk)
{
- sk_release_kernel(sk);
+ if (sk == NULL || sk->sk_socket == NULL)
+ return;
+
+ sock_release(sk->sk_socket);
}
EXPORT_SYMBOL(netlink_kernel_release);
@@ -3139,7 +3184,6 @@ static const struct rhashtable_params netlink_rhashtable_params = {
.key_len = netlink_compare_arg_len,
.obj_hashfn = netlink_hash,
.obj_cmpfn = netlink_compare,
- .max_size = 65536,
.automatic_shrinking = true,
};
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index b987fd56c3c5..ed212ffc1d9d 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -433,7 +433,7 @@ static int nr_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_SEQPACKET || protocol != 0)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto);
+ sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, kern);
if (sk == NULL)
return -ENOMEM;
@@ -476,7 +476,7 @@ static struct sock *nr_make_new(struct sock *osk)
if (osk->sk_type != SOCK_SEQPACKET)
return NULL;
- sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot);
+ sk = sk_alloc(sock_net(osk), PF_NETROM, GFP_ATOMIC, osk->sk_prot, 0);
if (sk == NULL)
return NULL;
diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index 2277276f52bc..54e40fa47822 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c
@@ -40,7 +40,7 @@ static int nfc_sock_create(struct net *net, struct socket *sock, int proto,
read_lock(&proto_tab_lock);
if (proto_tab[proto] && try_module_get(proto_tab[proto]->owner)) {
- rc = proto_tab[proto]->create(net, sock, proto_tab[proto]);
+ rc = proto_tab[proto]->create(net, sock, proto_tab[proto], kern);
module_put(proto_tab[proto]->owner);
}
read_unlock(&proto_tab_lock);
diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h
index de1789e3cc82..1f68724d44d3 100644
--- a/net/nfc/llcp.h
+++ b/net/nfc/llcp.h
@@ -225,7 +225,7 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
struct sk_buff *skb, u8 direction);
/* Sock API */
-struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp);
+struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern);
void nfc_llcp_sock_free(struct nfc_llcp_sock *sock);
void nfc_llcp_accept_unlink(struct sock *sk);
void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk);
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index b18f07ccb504..98876274a1ee 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -934,7 +934,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
sock->ssap = ssap;
}
- new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC);
+ new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type, GFP_ATOMIC, 0);
if (new_sk == NULL) {
reason = LLCP_DM_REJ;
release_sock(&sock->sk);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 9578bd6a4f3e..b7de0da46acd 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -942,12 +942,12 @@ static void llcp_sock_destruct(struct sock *sk)
}
}
-struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp)
+struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp, int kern)
{
struct sock *sk;
struct nfc_llcp_sock *llcp_sock;
- sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto);
+ sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto, kern);
if (!sk)
return NULL;
@@ -993,7 +993,7 @@ void nfc_llcp_sock_free(struct nfc_llcp_sock *sock)
}
static int llcp_sock_create(struct net *net, struct socket *sock,
- const struct nfc_protocol *nfc_proto)
+ const struct nfc_protocol *nfc_proto, int kern)
{
struct sock *sk;
@@ -1009,7 +1009,7 @@ static int llcp_sock_create(struct net *net, struct socket *sock,
else
sock->ops = &llcp_sock_ops;
- sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC);
+ sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC, kern);
if (sk == NULL)
return -ENOMEM;
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index a8ce80b47720..5c93e8412a26 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -30,7 +30,7 @@ struct nfc_protocol {
struct proto *proto;
struct module *owner;
int (*create)(struct net *net, struct socket *sock,
- const struct nfc_protocol *nfc_proto);
+ const struct nfc_protocol *nfc_proto, int kern);
};
struct nfc_rawsock {
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 82b4e8024778..e9a91488fe3d 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -334,7 +334,7 @@ static void rawsock_destruct(struct sock *sk)
}
static int rawsock_create(struct net *net, struct socket *sock,
- const struct nfc_protocol *nfc_proto)
+ const struct nfc_protocol *nfc_proto, int kern)
{
struct sock *sk;
@@ -348,7 +348,7 @@ static int rawsock_create(struct net *net, struct socket *sock,
else
sock->ops = &rawsock_ops;
- sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto);
+ sk = sk_alloc(net, PF_NFC, GFP_ATOMIC, nfc_proto->proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index ed6b0f8dd1bb..15840401a2ce 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -59,7 +59,7 @@ config OPENVSWITCH_VXLAN
config OPENVSWITCH_GENEVE
tristate "Open vSwitch Geneve tunneling support"
depends on OPENVSWITCH
- depends on GENEVE
+ depends on GENEVE_CORE
default OPENVSWITCH
---help---
If you say Y here, then the Open vSwitch will be able create geneve vport.
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 096c6276e6b9..3b90461317ec 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -545,7 +545,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
/* Normally, setting the skb 'protocol' field would be handled by a
* call to eth_type_trans(), but it assumes there's a sending
* device, which we may not have. */
- if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
+ if (eth_proto_is_802_3(eth->h_proto))
packet->protocol = eth->h_proto;
else
packet->protocol = htons(ETH_P_802_2);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 2dacc7b5af23..bc7b0aba994a 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -332,7 +332,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
proto = *(__be16 *) skb->data;
__skb_pull(skb, sizeof(__be16));
- if (ntohs(proto) >= ETH_P_802_3_MIN)
+ if (eth_proto_is_802_3(proto))
return proto;
if (skb->len < sizeof(struct llc_snap_hdr))
@@ -349,7 +349,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
__skb_pull(skb, sizeof(struct llc_snap_hdr));
- if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN)
+ if (eth_proto_is_802_3(llc->ethertype))
return llc->ethertype;
return htons(ETH_P_802_2);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index c691b1a1eee0..624e41c4267f 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -816,7 +816,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
if (is_mask) {
/* Always exact match EtherType. */
eth_type = htons(0xffff);
- } else if (ntohs(eth_type) < ETH_P_802_3_MIN) {
+ } else if (!eth_proto_is_802_3(eth_type)) {
OVS_NLERR(log, "EtherType %x is less than min %x",
ntohs(eth_type), ETH_P_802_3_MIN);
return -EINVAL;
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index bf02fd5808c9..208c576bd1b6 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -46,11 +46,6 @@ static inline struct geneve_port *geneve_vport(const struct vport *vport)
return vport_priv(vport);
}
-static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
-{
- return (struct genevehdr *)(udp_hdr(skb) + 1);
-}
-
/* Convert 64 bit tunnel ID to 24 bit VNI. */
static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
{
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5102c3cc4eec..fd5164139bf0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1234,27 +1234,81 @@ static void packet_free_pending(struct packet_sock *po)
free_percpu(po->tx_ring.pending_refcnt);
}
-static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
+#define ROOM_POW_OFF 2
+#define ROOM_NONE 0x0
+#define ROOM_LOW 0x1
+#define ROOM_NORMAL 0x2
+
+static bool __tpacket_has_room(struct packet_sock *po, int pow_off)
+{
+ int idx, len;
+
+ len = po->rx_ring.frame_max + 1;
+ idx = po->rx_ring.head;
+ if (pow_off)
+ idx += len >> pow_off;
+ if (idx >= len)
+ idx -= len;
+ return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
+}
+
+static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off)
+{
+ int idx, len;
+
+ len = po->rx_ring.prb_bdqc.knum_blocks;
+ idx = po->rx_ring.prb_bdqc.kactive_blk_num;
+ if (pow_off)
+ idx += len >> pow_off;
+ if (idx >= len)
+ idx -= len;
+ return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
+}
+
+static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
{
struct sock *sk = &po->sk;
- bool has_room;
+ int ret = ROOM_NONE;
+
+ if (po->prot_hook.func != tpacket_rcv) {
+ int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)
+ - (skb ? skb->truesize : 0);
+ if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF))
+ return ROOM_NORMAL;
+ else if (avail > 0)
+ return ROOM_LOW;
+ else
+ return ROOM_NONE;
+ }
- if (po->prot_hook.func != tpacket_rcv)
- return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize)
- <= sk->sk_rcvbuf;
+ if (po->tp_version == TPACKET_V3) {
+ if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
+ ret = ROOM_NORMAL;
+ else if (__tpacket_v3_has_room(po, 0))
+ ret = ROOM_LOW;
+ } else {
+ if (__tpacket_has_room(po, ROOM_POW_OFF))
+ ret = ROOM_NORMAL;
+ else if (__tpacket_has_room(po, 0))
+ ret = ROOM_LOW;
+ }
- spin_lock(&sk->sk_receive_queue.lock);
- if (po->tp_version == TPACKET_V3)
- has_room = prb_lookup_block(po, &po->rx_ring,
- po->rx_ring.prb_bdqc.kactive_blk_num,
- TP_STATUS_KERNEL);
- else
- has_room = packet_lookup_frame(po, &po->rx_ring,
- po->rx_ring.head,
- TP_STATUS_KERNEL);
- spin_unlock(&sk->sk_receive_queue.lock);
+ return ret;
+}
- return has_room;
+static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
+{
+ int ret;
+ bool has_room;
+
+ spin_lock_bh(&po->sk.sk_receive_queue.lock);
+ ret = __packet_rcv_has_room(po, skb);
+ has_room = ret == ROOM_NORMAL;
+ if (po->pressure == has_room)
+ po->pressure = !has_room;
+ spin_unlock_bh(&po->sk.sk_receive_queue.lock);
+
+ return ret;
}
static void packet_sock_destruct(struct sock *sk)
@@ -1282,6 +1336,20 @@ static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
return x;
}
+static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
+{
+ u32 rxhash;
+ int i, count = 0;
+
+ rxhash = skb_get_hash(skb);
+ for (i = 0; i < ROLLOVER_HLEN; i++)
+ if (po->rollover->history[i] == rxhash)
+ count++;
+
+ po->rollover->history[prandom_u32() % ROLLOVER_HLEN] = rxhash;
+ return count > (ROLLOVER_HLEN >> 1);
+}
+
static unsigned int fanout_demux_hash(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int num)
@@ -1318,22 +1386,40 @@ static unsigned int fanout_demux_rnd(struct packet_fanout *f,
static unsigned int fanout_demux_rollover(struct packet_fanout *f,
struct sk_buff *skb,
- unsigned int idx, unsigned int skip,
+ unsigned int idx, bool try_self,
unsigned int num)
{
- unsigned int i, j;
+ struct packet_sock *po, *po_next, *po_skip = NULL;
+ unsigned int i, j, room = ROOM_NONE;
+
+ po = pkt_sk(f->arr[idx]);
+
+ if (try_self) {
+ room = packet_rcv_has_room(po, skb);
+ if (room == ROOM_NORMAL ||
+ (room == ROOM_LOW && !fanout_flow_is_huge(po, skb)))
+ return idx;
+ po_skip = po;
+ }
- i = j = min_t(int, f->next[idx], num - 1);
+ i = j = min_t(int, po->rollover->sock, num - 1);
do {
- if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) {
+ po_next = pkt_sk(f->arr[i]);
+ if (po_next != po_skip && !po_next->pressure &&
+ packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
if (i != j)
- f->next[idx] = i;
+ po->rollover->sock = i;
+ atomic_long_inc(&po->rollover->num);
+ if (room == ROOM_LOW)
+ atomic_long_inc(&po->rollover->num_huge);
return i;
}
+
if (++i == num)
i = 0;
} while (i != j);
+ atomic_long_inc(&po->rollover->num_failed);
return idx;
}
@@ -1386,17 +1472,14 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
idx = fanout_demux_qm(f, skb, num);
break;
case PACKET_FANOUT_ROLLOVER:
- idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
+ idx = fanout_demux_rollover(f, skb, 0, false, num);
break;
}
- po = pkt_sk(f->arr[idx]);
- if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) &&
- unlikely(!packet_rcv_has_room(po, skb))) {
- idx = fanout_demux_rollover(f, skb, idx, idx, num);
- po = pkt_sk(f->arr[idx]);
- }
+ if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
+ idx = fanout_demux_rollover(f, skb, idx, true, num);
+ po = pkt_sk(f->arr[idx]);
return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
}
@@ -1467,6 +1550,16 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
if (po->fanout)
return -EALREADY;
+ if (type == PACKET_FANOUT_ROLLOVER ||
+ (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
+ po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL);
+ if (!po->rollover)
+ return -ENOMEM;
+ atomic_long_set(&po->rollover->num, 0);
+ atomic_long_set(&po->rollover->num_huge, 0);
+ atomic_long_set(&po->rollover->num_failed, 0);
+ }
+
mutex_lock(&fanout_mutex);
match = NULL;
list_for_each_entry(f, &fanout_list, list) {
@@ -1515,6 +1608,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
}
out:
mutex_unlock(&fanout_mutex);
+ if (err) {
+ kfree(po->rollover);
+ po->rollover = NULL;
+ }
return err;
}
@@ -1536,6 +1633,8 @@ static void fanout_release(struct sock *sk)
kfree(f);
}
mutex_unlock(&fanout_mutex);
+
+ kfree(po->rollover);
}
static const struct proto_ops packet_ops;
@@ -2311,11 +2410,14 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
tlen = dev->needed_tailroom;
skb = sock_alloc_send_skb(&po->sk,
hlen + tlen + sizeof(struct sockaddr_ll),
- 0, &err);
+ !need_wait, &err);
- if (unlikely(skb == NULL))
+ if (unlikely(skb == NULL)) {
+ /* we assume the socket was initially writeable ... */
+ if (likely(len_sum > 0))
+ err = len_sum;
goto out_status;
-
+ }
tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
addr, hlen);
if (tp_len > dev->mtu + dev->hard_header_len) {
@@ -2832,7 +2934,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
sock->state = SS_UNCONNECTED;
err = -ENOBUFS;
- sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
+ sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern);
if (sk == NULL)
goto out;
@@ -2862,6 +2964,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
spin_lock_init(&po->bind_lock);
mutex_init(&po->pg_vec_lock);
+ po->rollover = NULL;
po->prot_hook.func = packet_rcv;
if (sock->type == SOCK_PACKET)
@@ -2939,6 +3042,9 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
if (skb == NULL)
goto out;
+ if (pkt_sk(sk)->pressure)
+ packet_rcv_has_room(pkt_sk(sk), NULL);
+
if (pkt_sk(sk)->has_vnet_hdr) {
struct virtio_net_hdr vnet_hdr = { 0 };
@@ -3482,6 +3588,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
struct packet_sock *po = pkt_sk(sk);
void *data = &val;
union tpacket_stats_u st;
+ struct tpacket_rollover_stats rstats;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3557,6 +3664,15 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
((u32)po->fanout->flags << 24)) :
0);
break;
+ case PACKET_ROLLOVER_STATS:
+ if (!po->rollover)
+ return -EINVAL;
+ rstats.tp_all = atomic_long_read(&po->rollover->num);
+ rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
+ rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
+ data = &rstats;
+ lv = sizeof(rstats);
+ break;
case PACKET_TX_HAS_OFF:
val = po->tp_tx_has_off;
break;
@@ -3694,6 +3810,8 @@ static unsigned int packet_poll(struct file *file, struct socket *sock,
TP_STATUS_KERNEL))
mask |= POLLIN | POLLRDNORM;
}
+ if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
+ po->pressure = 0;
spin_unlock_bh(&sk->sk_receive_queue.lock);
spin_lock_bh(&sk->sk_write_queue.lock);
if (po->tx_ring.pg_vec) {
diff --git a/net/packet/internal.h b/net/packet/internal.h
index fe6e20caea1d..c035d263c1e8 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -82,12 +82,20 @@ struct packet_fanout {
atomic_t rr_cur;
struct list_head list;
struct sock *arr[PACKET_FANOUT_MAX];
- int next[PACKET_FANOUT_MAX];
spinlock_t lock;
atomic_t sk_ref;
struct packet_type prot_hook ____cacheline_aligned_in_smp;
};
+struct packet_rollover {
+ int sock;
+ atomic_long_t num;
+ atomic_long_t num_huge;
+ atomic_long_t num_failed;
+#define ROLLOVER_HLEN (L1_CACHE_BYTES / sizeof(u32))
+ u32 history[ROLLOVER_HLEN] ____cacheline_aligned;
+} ____cacheline_aligned_in_smp;
+
struct packet_sock {
/* struct sock has to be the first member of packet_sock */
struct sock sk;
@@ -102,8 +110,10 @@ struct packet_sock {
auxdata:1,
origdev:1,
has_vnet_hdr:1;
+ int pressure;
int ifindex; /* bound device */
__be16 num;
+ struct packet_rollover *rollover;
struct packet_mclist *mclist;
atomic_t mapped;
enum tpacket_versions tp_version;
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 32ab87d34828..10d42f3220ab 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -97,7 +97,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
goto out;
}
- sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot);
+ sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot, kern);
if (sk == NULL) {
err = -ENOMEM;
goto out;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 6de2aeb98a1f..850a86cde0b3 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -845,7 +845,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
}
/* Create a new to-be-accepted sock */
- newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot);
+ newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, 0);
if (!newsk) {
pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL);
err = -ENOBUFS;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 10443377fb9d..3d83641f2861 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -440,7 +440,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_SEQPACKET || protocol)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto);
+ sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 14f041398ca1..da6da57e5f36 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -126,7 +126,10 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
struct rds_transport *loop_trans;
unsigned long flags;
int ret;
+ struct rds_transport *otrans = trans;
+ if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
+ goto new_conn;
rcu_read_lock();
conn = rds_conn_lookup(head, laddr, faddr, trans);
if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
@@ -142,6 +145,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
if (conn)
goto out;
+new_conn:
conn = kmem_cache_zalloc(rds_conn_slab, gfp);
if (!conn) {
conn = ERR_PTR(-ENOMEM);
@@ -230,13 +234,22 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
/* Creating normal conn */
struct rds_connection *found;
- found = rds_conn_lookup(head, laddr, faddr, trans);
+ if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
+ found = NULL;
+ else
+ found = rds_conn_lookup(head, laddr, faddr, trans);
if (found) {
trans->conn_free(conn->c_transport_data);
kmem_cache_free(rds_conn_slab, conn);
conn = found;
} else {
- hlist_add_head_rcu(&conn->c_hash_node, head);
+ if ((is_outgoing && otrans->t_type == RDS_TRANS_TCP) ||
+ (otrans->t_type != RDS_TRANS_TCP)) {
+ /* Only the active side should be added to
+ * reconnect list for TCP.
+ */
+ hlist_add_head_rcu(&conn->c_hash_node, head);
+ }
rds_cong_add_conn(conn);
rds_conn_count++;
}
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 31b74f5e61ad..8a09ee7db3c1 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -183,8 +183,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
/* If the peer gave us the last packet it saw, process this as if
* we had received a regular ACK. */
- if (dp && dp->dp_ack_seq)
- rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
+ if (dp) {
+ /* dp structure start is not guaranteed to be 8 bytes aligned.
+ * Since dp_ack_seq is 64-bit extended load operations can be
+ * used so go through get_unaligned to avoid unaligned errors.
+ */
+ __be64 dp_ack_seq = get_unaligned(&dp->dp_ack_seq);
+
+ if (dp_ack_seq)
+ rds_send_drop_acked(conn, be64_to_cpu(dp_ack_seq),
+ NULL);
+ }
rds_connect_complete(conn);
}
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index f9f564a6c960..973109c7b8e8 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -62,6 +62,7 @@ void rds_tcp_state_change(struct sock *sk)
case TCP_ESTABLISHED:
rds_connect_complete(conn);
break;
+ case TCP_CLOSE_WAIT:
case TCP_CLOSE:
rds_conn_drop(conn);
default:
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 23ab4dcd1d9f..0da49e34495f 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -45,12 +45,45 @@ static void rds_tcp_accept_worker(struct work_struct *work);
static DECLARE_WORK(rds_tcp_listen_work, rds_tcp_accept_worker);
static struct socket *rds_tcp_listen_sock;
+static int rds_tcp_keepalive(struct socket *sock)
+{
+ /* values below based on xs_udp_default_timeout */
+ int keepidle = 5; /* send a probe 'keepidle' secs after last data */
+ int keepcnt = 5; /* number of unack'ed probes before declaring dead */
+ int keepalive = 1;
+ int ret = 0;
+
+ ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+ (char *)&keepalive, sizeof(keepalive));
+ if (ret < 0)
+ goto bail;
+
+ ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT,
+ (char *)&keepcnt, sizeof(keepcnt));
+ if (ret < 0)
+ goto bail;
+
+ ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE,
+ (char *)&keepidle, sizeof(keepidle));
+ if (ret < 0)
+ goto bail;
+
+ /* KEEPINTVL is the interval between successive probes. We follow
+ * the model in xs_tcp_finish_connecting() and re-use keepidle.
+ */
+ ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL,
+ (char *)&keepidle, sizeof(keepidle));
+bail:
+ return ret;
+}
+
static int rds_tcp_accept_one(struct socket *sock)
{
struct socket *new_sock = NULL;
struct rds_connection *conn;
int ret;
struct inet_sock *inet;
+ struct rds_tcp_connection *rs_tcp;
ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type,
sock->sk->sk_protocol, &new_sock);
@@ -63,6 +96,10 @@ static int rds_tcp_accept_one(struct socket *sock)
if (ret < 0)
goto out;
+ ret = rds_tcp_keepalive(new_sock);
+ if (ret < 0)
+ goto out;
+
rds_tcp_tune(new_sock);
inet = inet_sk(new_sock->sk);
@@ -77,6 +114,15 @@ static int rds_tcp_accept_one(struct socket *sock)
ret = PTR_ERR(conn);
goto out;
}
+ /* An incoming SYN request came in, and TCP just accepted it.
+ * We always create a new conn for listen side of TCP, and do not
+ * add it to the c_hash_list.
+ *
+ * If the client reboots, this conn will need to be cleaned up.
+ * rds_tcp_state_change() will do that cleanup
+ */
+ rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
+ WARN_ON(!rs_tcp || rs_tcp->t_sock);
/*
* see the comment above rds_queue_delayed_reconnect()
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 8ae603069a1a..36dbc2da3661 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -520,7 +520,7 @@ static int rose_create(struct net *net, struct socket *sock, int protocol,
if (sock->type != SOCK_SEQPACKET || protocol != 0)
return -ESOCKTNOSUPPORT;
- sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto);
+ sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, kern);
if (sk == NULL)
return -ENOMEM;
@@ -559,7 +559,7 @@ static struct sock *rose_make_new(struct sock *osk)
if (osk->sk_type != SOCK_SEQPACKET)
return NULL;
- sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto);
+ sk = sk_alloc(sock_net(osk), PF_ROSE, GFP_ATOMIC, &rose_proto, 0);
if (sk == NULL)
return NULL;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 0095b9a0b779..25d60ed15284 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -632,7 +632,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &rxrpc_rpc_ops;
sock->state = SS_UNCONNECTED;
- sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto);
+ sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, kern);
if (!sk)
return -ENOMEM;
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
index ca904ed5400a..78483b4602bf 100644
--- a/net/rxrpc/ar-local.c
+++ b/net/rxrpc/ar-local.c
@@ -73,8 +73,8 @@ static int rxrpc_create_local(struct rxrpc_local *local)
_enter("%p{%d}", local, local->srx.transport_type);
/* create a socket to represent the local endpoint */
- ret = sock_create_kern(PF_INET, local->srx.transport_type, IPPROTO_UDP,
- &local->socket);
+ ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type,
+ IPPROTO_UDP, &local->socket);
if (ret < 0) {
_leave(" = %d [socket]", ret);
return ret;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2274e723a3df..daa33432b716 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -312,6 +312,7 @@ config NET_SCH_PIE
config NET_SCH_INGRESS
tristate "Ingress Qdisc"
depends on NET_CLS_ACT
+ select NET_INGRESS
---help---
Say Y here if you want to use classifiers for incoming packets.
If unsure, say Y.
@@ -477,6 +478,16 @@ config NET_CLS_BPF
To compile this code as a module, choose M here: the module will
be called cls_bpf.
+config NET_CLS_FLOWER
+ tristate "Flower classifier"
+ select NET_CLS
+ ---help---
+ If you say Y here, you will be able to classify packets based on
+ a configurable combination of packet keys and masks.
+
+ To compile this code as a module, choose M here: the module will
+ be called cls_flower.
+
config NET_EMATCH
bool "Extended Matches"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7ca7f4c1b8c2..690c1689e090 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
obj-$(CONFIG_NET_CLS_BPF) += cls_bpf.o
+obj-$(CONFIG_NET_CLS_FLOWER) += cls_flower.o
obj-$(CONFIG_NET_EMATCH) += ematch.o
obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3d43e4979f27..af427a3dbcba 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -392,11 +392,6 @@ int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
list_for_each_entry(a, actions, list) {
repeat:
ret = a->ops->act(skb, a, res);
- if (TC_MUNGED & skb->tc_verd) {
- /* copied already, allow trampling */
- skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
- skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
- }
if (ret == TC_ACT_REPEAT)
goto repeat; /* we need a ttl - JHS */
if (ret != TC_ACT_PIPE)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 8e472518f9f6..295d14bd6c67 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -63,7 +63,6 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
skb->mark = c->mark;
/* using overlimits stats to count how many packets marked */
ca->tcf_qstats.overlimits++;
- nf_ct_put(c);
goto out;
}
@@ -82,7 +81,6 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
nf_ct_put(c);
out:
- skb->nfct = NULL;
spin_unlock(&ca->tcf_lock);
return ca->tcf_action;
}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 3f63ceac8e01..a42a3b257226 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -151,7 +151,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
}
at = G_TC_AT(skb->tc_verd);
- skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action);
+ skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2 == NULL)
goto out;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 59649d588d79..17e6d6669c7f 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -108,7 +108,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_pedit *p = a->priv;
- int i, munged = 0;
+ int i;
unsigned int off;
if (skb_unclone(skb, GFP_ATOMIC))
@@ -156,11 +156,8 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
*ptr = ((*ptr & tkey->mask) ^ tkey->val);
if (ptr == &_data)
skb_store_bits(skb, off + offset, ptr, 4);
- munged++;
}
- if (munged)
- skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
goto done;
} else
WARN(1, "pedit BUG: index %d\n", p->tcf_index);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8b0470e418dc..a75864d93142 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -81,6 +81,11 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
struct tcf_proto_ops *t;
int rc = -ENOENT;
+ /* Wait for outstanding call_rcu()s, if any, from a
+ * tcf_proto_ops's destroy() handler.
+ */
+ rcu_barrier();
+
write_lock(&cls_mod_lock);
list_for_each_entry(t, &tcf_proto_base, head) {
if (t == ops) {
@@ -308,12 +313,11 @@ replay:
case RTM_DELTFILTER:
err = tp->ops->delete(tp, fh);
if (err == 0) {
- tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
- if (tcf_destroy(tp, false)) {
- struct tcf_proto *next = rtnl_dereference(tp->next);
+ struct tcf_proto *next = rtnl_dereference(tp->next);
+ tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
+ if (tcf_destroy(tp, false))
RCU_INIT_POINTER(*back, next);
- }
}
goto errout;
case RTM_GETTFILTER:
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index a620c4e288a5..b4359924846c 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -26,7 +26,7 @@
#include <net/pkt_cls.h>
#include <net/ip.h>
#include <net/route.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack.h>
@@ -68,35 +68,35 @@ static inline u32 addr_fold(void *addr)
static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->src)
- return ntohl(flow->src);
+ if (flow->addrs.src)
+ return ntohl(flow->addrs.src);
return addr_fold(skb->sk);
}
static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->dst)
- return ntohl(flow->dst);
+ if (flow->addrs.dst)
+ return ntohl(flow->addrs.dst);
return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
{
- return flow->ip_proto;
+ return flow->basic.ip_proto;
}
static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->ports)
- return ntohs(flow->port16[0]);
+ if (flow->ports.ports)
+ return ntohs(flow->ports.src);
return addr_fold(skb->sk);
}
static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- if (flow->ports)
- return ntohs(flow->port16[1]);
+ if (flow->ports.ports)
+ return ntohs(flow->ports.dst);
return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
@@ -295,7 +295,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
keymask = f->keymask;
if (keymask & FLOW_KEYS_NEEDED)
- skb_flow_dissect(skb, &flow_keys);
+ skb_flow_dissect_flow_keys(skb, &flow_keys);
for (n = 0; n < f->nkeys; n++) {
key = ffs(keymask) - 1;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
new file mode 100644
index 000000000000..8c8f34ef6980
--- /dev/null
+++ b/net/sched/cls_flower.c
@@ -0,0 +1,688 @@
+/*
+ * net/sched/cls_flower.c Flower classifier
+ *
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/rhashtable.h>
+
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+#include <net/ip.h>
+#include <net/flow_dissector.h>
+
+struct fl_flow_key {
+ int indev_ifindex;
+ struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_eth_addrs eth;
+ union {
+ struct flow_dissector_key_addrs ipv4;
+ struct flow_dissector_key_ipv6_addrs ipv6;
+ };
+ struct flow_dissector_key_ports tp;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct fl_flow_mask_range {
+ unsigned short int start;
+ unsigned short int end;
+};
+
+struct fl_flow_mask {
+ struct fl_flow_key key;
+ struct fl_flow_mask_range range;
+ struct rcu_head rcu;
+};
+
+struct cls_fl_head {
+ struct rhashtable ht;
+ struct fl_flow_mask mask;
+ struct flow_dissector dissector;
+ u32 hgen;
+ bool mask_assigned;
+ struct list_head filters;
+ struct rhashtable_params ht_params;
+ struct rcu_head rcu;
+};
+
+struct cls_fl_filter {
+ struct rhash_head ht_node;
+ struct fl_flow_key mkey;
+ struct tcf_exts exts;
+ struct tcf_result res;
+ struct fl_flow_key key;
+ struct list_head list;
+ u32 handle;
+ struct rcu_head rcu;
+};
+
+static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
+{
+ return mask->range.end - mask->range.start;
+}
+
+static void fl_mask_update_range(struct fl_flow_mask *mask)
+{
+ const u8 *bytes = (const u8 *) &mask->key;
+ size_t size = sizeof(mask->key);
+ size_t i, first = 0, last = size - 1;
+
+ for (i = 0; i < sizeof(mask->key); i++) {
+ if (bytes[i]) {
+ if (!first && i)
+ first = i;
+ last = i;
+ }
+ }
+ mask->range.start = rounddown(first, sizeof(long));
+ mask->range.end = roundup(last + 1, sizeof(long));
+}
+
+static void *fl_key_get_start(struct fl_flow_key *key,
+ const struct fl_flow_mask *mask)
+{
+ return (u8 *) key + mask->range.start;
+}
+
+static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
+ struct fl_flow_mask *mask)
+{
+ const long *lkey = fl_key_get_start(key, mask);
+ const long *lmask = fl_key_get_start(&mask->key, mask);
+ long *lmkey = fl_key_get_start(mkey, mask);
+ int i;
+
+ for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
+ *lmkey++ = *lkey++ & *lmask++;
+}
+
+static void fl_clear_masked_range(struct fl_flow_key *key,
+ struct fl_flow_mask *mask)
+{
+ memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
+}
+
+static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ struct cls_fl_head *head = rcu_dereference_bh(tp->root);
+ struct cls_fl_filter *f;
+ struct fl_flow_key skb_key;
+ struct fl_flow_key skb_mkey;
+
+ fl_clear_masked_range(&skb_key, &head->mask);
+ skb_key.indev_ifindex = skb->skb_iif;
+ /* skb_flow_dissect() does not set n_proto in case an unknown protocol,
+ * so do it rather here.
+ */
+ skb_key.basic.n_proto = skb->protocol;
+ skb_flow_dissect(skb, &head->dissector, &skb_key);
+
+ fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
+
+ f = rhashtable_lookup_fast(&head->ht,
+ fl_key_get_start(&skb_mkey, &head->mask),
+ head->ht_params);
+ if (f) {
+ *res = f->res;
+ return tcf_exts_exec(skb, &f->exts, res);
+ }
+ return -1;
+}
+
+static int fl_init(struct tcf_proto *tp)
+{
+ struct cls_fl_head *head;
+
+ head = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (!head)
+ return -ENOBUFS;
+
+ INIT_LIST_HEAD_RCU(&head->filters);
+ rcu_assign_pointer(tp->root, head);
+
+ return 0;
+}
+
+static void fl_destroy_filter(struct rcu_head *head)
+{
+ struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+
+ tcf_exts_destroy(&f->exts);
+ kfree(f);
+}
+
+static bool fl_destroy(struct tcf_proto *tp, bool force)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f, *next;
+
+ if (!force && !list_empty(&head->filters))
+ return false;
+
+ list_for_each_entry_safe(f, next, &head->filters, list) {
+ list_del_rcu(&f->list);
+ call_rcu(&f->rcu, fl_destroy_filter);
+ }
+ RCU_INIT_POINTER(tp->root, NULL);
+ if (head->mask_assigned)
+ rhashtable_destroy(&head->ht);
+ kfree_rcu(head, rcu);
+ return true;
+}
+
+static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f;
+
+ list_for_each_entry(f, &head->filters, list)
+ if (f->handle == handle)
+ return (unsigned long) f;
+ return 0;
+}
+
+static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
+ [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC },
+ [TCA_FLOWER_CLASSID] = { .type = NLA_U32 },
+ [TCA_FLOWER_INDEV] = { .type = NLA_STRING,
+ .len = IFNAMSIZ },
+ [TCA_FLOWER_KEY_ETH_DST] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_DST_MASK] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_SRC] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_SRC_MASK] = { .len = ETH_ALEN },
+ [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_IP_PROTO] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_IPV4_SRC] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV4_SRC_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV4_DST] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV4_DST_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
+ [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_TCP_SRC] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 },
+};
+
+static void fl_set_key_val(struct nlattr **tb,
+ void *val, int val_type,
+ void *mask, int mask_type, int len)
+{
+ if (!tb[val_type])
+ return;
+ memcpy(val, nla_data(tb[val_type]), len);
+ if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
+ memset(mask, 0xff, len);
+ else
+ memcpy(mask, nla_data(tb[mask_type]), len);
+}
+
+static int fl_set_key(struct net *net, struct nlattr **tb,
+ struct fl_flow_key *key, struct fl_flow_key *mask)
+{
+#ifdef CONFIG_NET_CLS_IND
+ if (tb[TCA_FLOWER_INDEV]) {
+ int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
+ if (err < 0)
+ return err;
+ key->indev_ifindex = err;
+ mask->indev_ifindex = 0xffffffff;
+ }
+#endif
+
+ fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+ mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+ sizeof(key->eth.dst));
+ fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+ mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+ sizeof(key->eth.src));
+ fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+ &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.n_proto));
+ if (key->basic.n_proto == htons(ETH_P_IP) ||
+ key->basic.n_proto == htons(ETH_P_IPV6)) {
+ fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+ &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.ip_proto));
+ }
+ if (key->basic.n_proto == htons(ETH_P_IP)) {
+ fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+ &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+ sizeof(key->ipv4.src));
+ fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+ &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+ sizeof(key->ipv4.dst));
+ } else if (key->basic.n_proto == htons(ETH_P_IPV6)) {
+ fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+ &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+ sizeof(key->ipv6.src));
+ fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+ &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+ sizeof(key->ipv6.dst));
+ }
+ if (key->basic.ip_proto == IPPROTO_TCP) {
+ fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src));
+ fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst));
+ } else if (key->basic.ip_proto == IPPROTO_UDP) {
+ fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src));
+ fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst));
+ }
+
+ return 0;
+}
+
+static bool fl_mask_eq(struct fl_flow_mask *mask1,
+ struct fl_flow_mask *mask2)
+{
+ const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
+ const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
+
+ return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
+ !memcmp(lmask1, lmask2, fl_mask_range(mask1));
+}
+
+static const struct rhashtable_params fl_ht_params = {
+ .key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
+ .head_offset = offsetof(struct cls_fl_filter, ht_node),
+ .automatic_shrinking = true,
+};
+
+static int fl_init_hashtable(struct cls_fl_head *head,
+ struct fl_flow_mask *mask)
+{
+ head->ht_params = fl_ht_params;
+ head->ht_params.key_len = fl_mask_range(mask);
+ head->ht_params.key_offset += mask->range.start;
+
+ return rhashtable_init(&head->ht, &head->ht_params);
+}
+
+#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
+#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
+#define FL_KEY_MEMBER_END_OFFSET(member) \
+ (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
+
+#define FL_KEY_IN_RANGE(mask, member) \
+ (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \
+ FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
+
+#define FL_KEY_SET(keys, cnt, id, member) \
+ do { \
+ keys[cnt].key_id = id; \
+ keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member); \
+ cnt++; \
+ } while(0);
+
+#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \
+ do { \
+ if (FL_KEY_IN_RANGE(mask, member)) \
+ FL_KEY_SET(keys, cnt, id, member); \
+ } while(0);
+
+static void fl_init_dissector(struct cls_fl_head *head,
+ struct fl_flow_mask *mask)
+{
+ struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
+ size_t cnt = 0;
+
+ FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+ FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_PORTS, tp);
+
+ skb_flow_dissector_init(&head->dissector, keys, cnt);
+}
+
+static int fl_check_assign_mask(struct cls_fl_head *head,
+ struct fl_flow_mask *mask)
+{
+ int err;
+
+ if (head->mask_assigned) {
+ if (!fl_mask_eq(&head->mask, mask))
+ return -EINVAL;
+ else
+ return 0;
+ }
+
+ /* Mask is not assigned yet. So assign it and init hashtable
+ * according to that.
+ */
+ err = fl_init_hashtable(head, mask);
+ if (err)
+ return err;
+ memcpy(&head->mask, mask, sizeof(head->mask));
+ head->mask_assigned = true;
+
+ fl_init_dissector(head, mask);
+
+ return 0;
+}
+
+static int fl_set_parms(struct net *net, struct tcf_proto *tp,
+ struct cls_fl_filter *f, struct fl_flow_mask *mask,
+ unsigned long base, struct nlattr **tb,
+ struct nlattr *est, bool ovr)
+{
+ struct tcf_exts e;
+ int err;
+
+ tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+ if (err < 0)
+ return err;
+
+ if (tb[TCA_FLOWER_CLASSID]) {
+ f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+ tcf_bind_filter(tp, &f->res, base);
+ }
+
+ err = fl_set_key(net, tb, &f->key, &mask->key);
+ if (err)
+ goto errout;
+
+ fl_mask_update_range(mask);
+ fl_set_masked_key(&f->mkey, &f->key, mask);
+
+ tcf_exts_change(tp, &f->exts, &e);
+
+ return 0;
+errout:
+ tcf_exts_destroy(&e);
+ return err;
+}
+
+static u32 fl_grab_new_handle(struct tcf_proto *tp,
+ struct cls_fl_head *head)
+{
+ unsigned int i = 0x80000000;
+ u32 handle;
+
+ do {
+ if (++head->hgen == 0x7FFFFFFF)
+ head->hgen = 1;
+ } while (--i > 0 && fl_get(tp, head->hgen));
+
+ if (unlikely(i == 0)) {
+ pr_err("Insufficient number of handles\n");
+ handle = 0;
+ } else {
+ handle = head->hgen;
+ }
+
+ return handle;
+}
+
+static int fl_change(struct net *net, struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
+ u32 handle, struct nlattr **tca,
+ unsigned long *arg, bool ovr)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
+ struct cls_fl_filter *fnew;
+ struct nlattr *tb[TCA_FLOWER_MAX + 1];
+ struct fl_flow_mask mask = {};
+ int err;
+
+ if (!tca[TCA_OPTIONS])
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
+ if (err < 0)
+ return err;
+
+ if (fold && handle && fold->handle != handle)
+ return -EINVAL;
+
+ fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+ if (!fnew)
+ return -ENOBUFS;
+
+ tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+
+ if (!handle) {
+ handle = fl_grab_new_handle(tp, head);
+ if (!handle) {
+ err = -EINVAL;
+ goto errout;
+ }
+ }
+ fnew->handle = handle;
+
+ err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
+ if (err)
+ goto errout;
+
+ err = fl_check_assign_mask(head, &mask);
+ if (err)
+ goto errout;
+
+ err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
+ head->ht_params);
+ if (err)
+ goto errout;
+ if (fold)
+ rhashtable_remove_fast(&head->ht, &fold->ht_node,
+ head->ht_params);
+
+ *arg = (unsigned long) fnew;
+
+ if (fold) {
+ list_replace_rcu(&fnew->list, &fold->list);
+ tcf_unbind_filter(tp, &fold->res);
+ call_rcu(&fold->rcu, fl_destroy_filter);
+ } else {
+ list_add_tail_rcu(&fnew->list, &head->filters);
+ }
+
+ return 0;
+
+errout:
+ kfree(fnew);
+ return err;
+}
+
+static int fl_delete(struct tcf_proto *tp, unsigned long arg)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
+
+ rhashtable_remove_fast(&head->ht, &f->ht_node,
+ head->ht_params);
+ list_del_rcu(&f->list);
+ tcf_unbind_filter(tp, &f->res);
+ call_rcu(&f->rcu, fl_destroy_filter);
+ return 0;
+}
+
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f;
+
+ list_for_each_entry_rcu(f, &head->filters, list) {
+ if (arg->count < arg->skip)
+ goto skip;
+ if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+skip:
+ arg->count++;
+ }
+}
+
+static int fl_dump_key_val(struct sk_buff *skb,
+ void *val, int val_type,
+ void *mask, int mask_type, int len)
+{
+ int err;
+
+ if (!memchr_inv(mask, 0, len))
+ return 0;
+ err = nla_put(skb, val_type, len, val);
+ if (err)
+ return err;
+ if (mask_type != TCA_FLOWER_UNSPEC) {
+ err = nla_put(skb, mask_type, len, mask);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+ struct sk_buff *skb, struct tcmsg *t)
+{
+ struct cls_fl_head *head = rtnl_dereference(tp->root);
+ struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
+ struct nlattr *nest;
+ struct fl_flow_key *key, *mask;
+
+ if (!f)
+ return skb->len;
+
+ t->tcm_handle = f->handle;
+
+ nest = nla_nest_start(skb, TCA_OPTIONS);
+ if (!nest)
+ goto nla_put_failure;
+
+ if (f->res.classid &&
+ nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
+ goto nla_put_failure;
+
+ key = &f->key;
+ mask = &head->mask.key;
+
+ if (mask->indev_ifindex) {
+ struct net_device *dev;
+
+ dev = __dev_get_by_index(net, key->indev_ifindex);
+ if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
+ goto nla_put_failure;
+ }
+
+ if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+ mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+ sizeof(key->eth.dst)) ||
+ fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+ mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+ sizeof(key->eth.src)) ||
+ fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+ &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.n_proto)))
+ goto nla_put_failure;
+ if ((key->basic.n_proto == htons(ETH_P_IP) ||
+ key->basic.n_proto == htons(ETH_P_IPV6)) &&
+ fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+ &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+ sizeof(key->basic.ip_proto)))
+ goto nla_put_failure;
+
+ if (key->basic.n_proto == htons(ETH_P_IP) &&
+ (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+ &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+ sizeof(key->ipv4.src)) ||
+ fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+ &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+ sizeof(key->ipv4.dst))))
+ goto nla_put_failure;
+ else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+ (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+ &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+ sizeof(key->ipv6.src)) ||
+ fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+ &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+ sizeof(key->ipv6.dst))))
+ goto nla_put_failure;
+
+ if (key->basic.ip_proto == IPPROTO_TCP &&
+ (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src)) ||
+ fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst))))
+ goto nla_put_failure;
+ else if (key->basic.ip_proto == IPPROTO_UDP &&
+ (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+ &mask->tp.src, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.src)) ||
+ fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+ &mask->tp.dst, TCA_FLOWER_UNSPEC,
+ sizeof(key->tp.dst))))
+ goto nla_put_failure;
+
+ if (tcf_exts_dump(skb, &f->exts))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ if (tcf_exts_dump_stats(skb, &f->exts) < 0)
+ goto nla_put_failure;
+
+ return skb->len;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
+static struct tcf_proto_ops cls_fl_ops __read_mostly = {
+ .kind = "flower",
+ .classify = fl_classify,
+ .init = fl_init,
+ .destroy = fl_destroy,
+ .get = fl_get,
+ .change = fl_change,
+ .delete = fl_delete,
+ .walk = fl_walk,
+ .dump = fl_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init cls_fl_init(void)
+{
+ return register_tcf_proto_ops(&cls_fl_ops);
+}
+
+static void __exit cls_fl_exit(void)
+{
+ unregister_tcf_proto_ops(&cls_fl_ops);
+}
+
+module_init(cls_fl_init);
+module_exit(cls_fl_exit);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_DESCRIPTION("Flower classifier");
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ad9eed70bc8f..0b74dc0ede9c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1816,13 +1816,8 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
continue;
err = tp->classify(skb, tp, res);
- if (err >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
- if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
- skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
-#endif
+ if (err >= 0)
return err;
- }
}
return -1;
}
@@ -1834,23 +1829,22 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
int err = 0;
#ifdef CONFIG_NET_CLS_ACT
const struct tcf_proto *otp = tp;
+ int limit = 0;
reclassify:
#endif
err = tc_classify_compat(skb, tp, res);
#ifdef CONFIG_NET_CLS_ACT
if (err == TC_ACT_RECLASSIFY) {
- u32 verd = G_TC_VERD(skb->tc_verd);
tp = otp;
- if (verd++ >= MAX_REC_LOOP) {
+ if (unlikely(limit++ >= MAX_REC_LOOP)) {
net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
tp->q->ops->id,
tp->prio & 0xffff,
ntohs(tp->protocol));
return TC_ACT_SHOT;
}
- skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
goto reclassify;
}
#endif
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index c009eb9045ce..93d5742dc7e0 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -18,7 +18,7 @@
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
#include <net/red.h>
-#include <net/flow_keys.h>
+#include <net/flow_dissector.h>
/*
CHOKe stateless AQM for fair bandwidth allocation
@@ -133,16 +133,10 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
--sch->q.qlen;
}
-/* private part of skb->cb[] that a qdisc is allowed to use
- * is limited to QDISC_CB_PRIV_LEN bytes.
- * As a flow key might be too large, we store a part of it only.
- */
-#define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3)
-
struct choke_skb_cb {
u16 classid;
u8 keys_valid;
- u8 keys[QDISC_CB_PRIV_LEN - 3];
+ struct flow_keys_digest keys;
};
static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
@@ -176,19 +170,19 @@ static bool choke_match_flow(struct sk_buff *skb1,
if (!choke_skb_cb(skb1)->keys_valid) {
choke_skb_cb(skb1)->keys_valid = 1;
- skb_flow_dissect(skb1, &temp);
- memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN);
+ skb_flow_dissect_flow_keys(skb1, &temp);
+ make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
}
if (!choke_skb_cb(skb2)->keys_valid) {
choke_skb_cb(skb2)->keys_valid = 1;
- skb_flow_dissect(skb2, &temp);
- memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN);
+ skb_flow_dissect_flow_keys(skb2, &temp);
+ make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
}
return !memcmp(&choke_skb_cb(skb1)->keys,
&choke_skb_cb(skb2)->keys,
- CHOKE_K_LEN);
+ sizeof(choke_skb_cb(skb1)->keys));
}
/*
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index de28f8e968e8..535007d5f0b5 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -6,7 +6,7 @@
*
* Implemented on linux by :
* Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
- * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -109,6 +109,7 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
[TCA_CODEL_LIMIT] = { .type = NLA_U32 },
[TCA_CODEL_INTERVAL] = { .type = NLA_U32 },
[TCA_CODEL_ECN] = { .type = NLA_U32 },
+ [TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 },
};
static int codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -133,6 +134,12 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
}
+ if (tb[TCA_CODEL_CE_THRESHOLD]) {
+ u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
+
+ q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+ }
+
if (tb[TCA_CODEL_INTERVAL]) {
u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
@@ -164,7 +171,7 @@ static int codel_init(struct Qdisc *sch, struct nlattr *opt)
sch->limit = DEFAULT_CODEL_LIMIT;
- codel_params_init(&q->params);
+ codel_params_init(&q->params, sch);
codel_vars_init(&q->vars);
codel_stats_init(&q->stats);
@@ -201,7 +208,10 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_CODEL_ECN,
q->params.ecn))
goto nla_put_failure;
-
+ if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+ nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
+ codel_time_to_us(q->params.ce_threshold)))
+ goto nla_put_failure;
return nla_nest_end(skb, opts);
nla_put_failure:
@@ -220,6 +230,7 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
.ldelay = codel_time_to_us(q->vars.ldelay),
.dropping = q->vars.dropping,
.ecn_mark = q->stats.ecn_mark,
+ .ce_mark = q->stats.ce_mark,
};
if (q->vars.dropping) {
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 1e52decb7b59..d75993f89fac 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -6,7 +6,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ * Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
*/
#include <linux/module.h>
@@ -23,7 +23,6 @@
#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-#include <net/flow_keys.h>
#include <net/codel.h>
/* Fair Queue CoDel.
@@ -68,15 +67,9 @@ struct fq_codel_sched_data {
};
static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q,
- const struct sk_buff *skb)
+ struct sk_buff *skb)
{
- struct flow_keys keys;
- unsigned int hash;
-
- skb_flow_dissect(skb, &keys);
- hash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src ^ keys.ip_proto,
- (__force u32)keys.ports, q->perturbation);
+ u32 hash = skb_get_hash_perturb(skb, q->perturbation);
return reciprocal_scale(hash, q->flows_cnt);
}
@@ -299,6 +292,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
[TCA_FQ_CODEL_ECN] = { .type = NLA_U32 },
[TCA_FQ_CODEL_FLOWS] = { .type = NLA_U32 },
[TCA_FQ_CODEL_QUANTUM] = { .type = NLA_U32 },
+ [TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 },
};
static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -329,6 +323,12 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
}
+ if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) {
+ u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]);
+
+ q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+ }
+
if (tb[TCA_FQ_CODEL_INTERVAL]) {
u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
@@ -391,7 +391,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
q->perturbation = prandom_u32();
INIT_LIST_HEAD(&q->new_flows);
INIT_LIST_HEAD(&q->old_flows);
- codel_params_init(&q->cparams);
+ codel_params_init(&q->cparams, sch);
codel_stats_init(&q->cstats);
q->cparams.ecn = true;
@@ -448,6 +448,11 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
q->flows_cnt))
goto nla_put_failure;
+ if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+ nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
+ codel_time_to_us(q->cparams.ce_threshold)))
+ goto nla_put_failure;
+
return nla_nest_end(skb, opts);
nla_put_failure:
@@ -466,6 +471,7 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
st.qdisc_stats.drop_overlimit = q->drop_overlimit;
st.qdisc_stats.ecn_mark = q->cstats.ecn_mark;
st.qdisc_stats.new_flow_count = q->new_flow_count;
+ st.qdisc_stats.ce_mark = q->cstats.ce_mark;
list_for_each(pos, &q->new_flows)
st.qdisc_stats.new_flows_len++;
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index a4ca4517cdc8..abb9f2fec28f 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -165,7 +165,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* if no default DP has been configured. This
* allows for DP flows to be left untouched.
*/
- if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len)
+ if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
+ sch->limit))
return qdisc_enqueue_tail(skb, sch);
else
goto drop;
@@ -229,7 +230,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
break;
}
- if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
+ if (gred_backlog(t, q, sch) + qdisc_pkt_len(skb) <= q->limit) {
q->backlog += qdisc_pkt_len(skb);
return qdisc_enqueue_tail(skb, sch);
}
@@ -397,7 +398,10 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
q->DP = dp;
q->prio = prio;
- q->limit = ctl->limit;
+ if (ctl->limit > sch->limit)
+ q->limit = sch->limit;
+ else
+ q->limit = ctl->limit;
if (q->backlog == 0)
red_end_of_idle_period(&q->vars);
@@ -414,6 +418,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
[TCA_GRED_STAB] = { .len = 256 },
[TCA_GRED_DPS] = { .len = sizeof(struct tc_gred_sopt) },
[TCA_GRED_MAX_P] = { .type = NLA_U32 },
+ [TCA_GRED_LIMIT] = { .type = NLA_U32 },
};
static int gred_change(struct Qdisc *sch, struct nlattr *opt)
@@ -433,11 +438,15 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
if (err < 0)
return err;
- if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL)
+ if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
+ if (tb[TCA_GRED_LIMIT] != NULL)
+ sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
return gred_change_table_def(sch, opt);
+ }
if (tb[TCA_GRED_PARMS] == NULL ||
- tb[TCA_GRED_STAB] == NULL)
+ tb[TCA_GRED_STAB] == NULL ||
+ tb[TCA_GRED_LIMIT] != NULL)
return -EINVAL;
max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
@@ -501,6 +510,14 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
return -EINVAL;
+ if (tb[TCA_GRED_LIMIT])
+ sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
+ else {
+ u32 qlen = qdisc_dev(sch)->tx_queue_len ? : 1;
+
+ sch->limit = qlen * psched_mtu(qdisc_dev(sch));
+ }
+
return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
}
@@ -531,6 +548,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p))
goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit))
+ goto nla_put_failure;
+
parms = nla_nest_start(skb, TCA_GRED_PARMS);
if (parms == NULL)
goto nla_put_failure;
@@ -553,7 +573,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.limit = q->limit;
opt.DP = q->DP;
- opt.backlog = q->backlog;
+ opt.backlog = gred_backlog(table, q, sch);
opt.prio = q->prio;
opt.qth_min = q->parms.qth_min >> q->parms.Wlog;
opt.qth_max = q->parms.qth_max >> q->parms.Wlog;
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 15d3aabfe250..9d15cb6b8cb1 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -9,7 +9,6 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
-#include <net/flow_keys.h>
#include <net/pkt_sched.h>
#include <net/sock.h>
@@ -176,22 +175,6 @@ static u32 hhf_time_stamp(void)
return jiffies;
}
-static unsigned int skb_hash(const struct hhf_sched_data *q,
- const struct sk_buff *skb)
-{
- struct flow_keys keys;
- unsigned int hash;
-
- if (skb->sk && skb->sk->sk_hash)
- return skb->sk->sk_hash;
-
- skb_flow_dissect(skb, &keys);
- hash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src ^ keys.ip_proto,
- (__force u32)keys.ports, q->perturbation);
- return hash;
-}
-
/* Looks up a heavy-hitter flow in a chaining list of table T. */
static struct hh_flow_state *seek_list(const u32 hash,
struct list_head *head,
@@ -280,7 +263,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
}
/* Get hashed flow-id of the skb. */
- hash = skb_hash(q, skb);
+ hash = skb_get_hash_perturb(skb, q->perturbation);
/* Check if this packet belongs to an already established HH flow. */
flow_pos = hash & HHF_BIT_MASK;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 4cdbfb85686a..e7c648fa9dc3 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -12,16 +12,10 @@
#include <linux/list.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
+
#include <net/netlink.h>
#include <net/pkt_sched.h>
-
-struct ingress_qdisc_data {
- struct tcf_proto __rcu *filter_list;
-};
-
-/* ------------------------- Class/flow operations ------------------------- */
-
static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
{
return NULL;
@@ -49,57 +43,24 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch,
unsigned long cl)
{
- struct ingress_qdisc_data *p = qdisc_priv(sch);
-
- return &p->filter_list;
-}
-
-/* --------------------------- Qdisc operations ---------------------------- */
+ struct net_device *dev = qdisc_dev(sch);
-static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
-{
- struct ingress_qdisc_data *p = qdisc_priv(sch);
- struct tcf_result res;
- struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
- int result;
-
- result = tc_classify(skb, fl, &res);
-
- qdisc_bstats_update(sch, skb);
- switch (result) {
- case TC_ACT_SHOT:
- result = TC_ACT_SHOT;
- qdisc_qstats_drop(sch);
- break;
- case TC_ACT_STOLEN:
- case TC_ACT_QUEUED:
- result = TC_ACT_STOLEN;
- break;
- case TC_ACT_RECLASSIFY:
- case TC_ACT_OK:
- skb->tc_index = TC_H_MIN(res.classid);
- default:
- result = TC_ACT_OK;
- break;
- }
-
- return result;
+ return &dev->ingress_cl_list;
}
-/* ------------------------------------------------------------- */
-
static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
{
net_inc_ingress_queue();
+ sch->flags |= TCQ_F_CPUSTATS;
return 0;
}
static void ingress_destroy(struct Qdisc *sch)
{
- struct ingress_qdisc_data *p = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
- tcf_destroy_chain(&p->filter_list);
+ tcf_destroy_chain(&dev->ingress_cl_list);
net_dec_ingress_queue();
}
@@ -110,6 +71,7 @@ static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
+
return nla_nest_end(skb, nest);
nla_put_failure:
@@ -130,8 +92,6 @@ static const struct Qdisc_class_ops ingress_class_ops = {
static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.cl_ops = &ingress_class_ops,
.id = "ingress",
- .priv_size = sizeof(struct ingress_qdisc_data),
- .enqueue = ingress_enqueue,
.init = ingress_init,
.destroy = ingress_destroy,
.dump = ingress_dump,
@@ -148,6 +108,7 @@ static void __exit ingress_module_exit(void)
unregister_qdisc(&ingress_qdisc_ops);
}
-module_init(ingress_module_init)
-module_exit(ingress_module_exit)
+module_init(ingress_module_init);
+module_exit(ingress_module_exit);
+
MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 956ead2cab9a..5abd1d9de989 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -440,9 +440,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
struct Qdisc *rootq = qdisc_root(sch);
u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
- q->duplicate = 0;
- qdisc_enqueue_root(skb2, rootq);
+ q->duplicate = 0;
+ rootq->enqueue(skb2, rootq);
q->duplicate = dupsave;
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 5819dd82630d..4b815193326c 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -26,7 +26,6 @@
#include <net/ip.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
-#include <net/flow_keys.h>
/*
* SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
@@ -285,9 +284,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
int i;
u32 p_min = ~0;
u32 minqlen = ~0;
- u32 r, slot, salt, sfbhash;
+ u32 r, sfbhash;
+ u32 slot = q->slot;
int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- struct flow_keys keys;
if (unlikely(sch->q.qlen >= q->limit)) {
qdisc_qstats_overlimit(sch);
@@ -309,22 +308,17 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
fl = rcu_dereference_bh(q->filter_list);
if (fl) {
+ u32 salt;
+
/* If using external classifiers, get result and record it. */
if (!sfb_classify(skb, fl, &ret, &salt))
goto other_drop;
- keys.src = salt;
- keys.dst = 0;
- keys.ports = 0;
+ sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
} else {
- skb_flow_dissect(skb, &keys);
+ sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
}
- slot = q->slot;
- sfbhash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src,
- (__force u32)keys.ports,
- q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
@@ -356,10 +350,8 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (unlikely(p_min >= SFB_MAX_PROB)) {
/* Inelastic flow */
if (q->double_buffering) {
- sfbhash = jhash_3words((__force u32)keys.dst,
- (__force u32)keys.src,
- (__force u32)keys.ports,
- q->bins[slot].perturbation);
+ sfbhash = skb_get_hash_perturb(skb,
+ q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b877140beda5..7d1492663360 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -23,7 +23,6 @@
#include <linux/vmalloc.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-#include <net/flow_keys.h>
#include <net/red.h>
@@ -156,30 +155,10 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
return &q->dep[val - SFQ_MAX_FLOWS];
}
-/*
- * In order to be able to quickly rehash our queue when timer changes
- * q->perturbation, we store flow_keys in skb->cb[]
- */
-struct sfq_skb_cb {
- struct flow_keys keys;
-};
-
-static inline struct sfq_skb_cb *sfq_skb_cb(const struct sk_buff *skb)
-{
- qdisc_cb_private_validate(skb, sizeof(struct sfq_skb_cb));
- return (struct sfq_skb_cb *)qdisc_skb_cb(skb)->data;
-}
-
static unsigned int sfq_hash(const struct sfq_sched_data *q,
const struct sk_buff *skb)
{
- const struct flow_keys *keys = &sfq_skb_cb(skb)->keys;
- unsigned int hash;
-
- hash = jhash_3words((__force u32)keys->dst,
- (__force u32)keys->src ^ keys->ip_proto,
- (__force u32)keys->ports, q->perturbation);
- return hash & (q->divisor - 1);
+ return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
}
static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -196,10 +175,8 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
return TC_H_MIN(skb->priority);
fl = rcu_dereference_bh(q->filter_list);
- if (!fl) {
- skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys);
+ if (!fl)
return sfq_hash(q, skb) + 1;
- }
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
result = tc_classify(skb, fl, &res);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0e4198ee2370..e703ff7fed40 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -635,7 +635,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct sctp6_sock *newsctp6sk;
- newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot);
+ newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, 0);
if (!newsk)
goto out;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 53b7acde9aa3..59e80356672b 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -550,7 +550,7 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
struct sctp_association *asoc)
{
struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL,
- sk->sk_prot);
+ sk->sk_prot, 0);
struct inet_sock *newinet;
if (!newsk)
diff --git a/net/socket.c b/net/socket.c
index 884e32997698..9963a0b53a64 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -576,9 +576,6 @@ void sock_release(struct socket *sock)
if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
pr_err("%s: fasync list not empty!\n", __func__);
- if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
- return;
-
this_cpu_sub(sockets_in_use, 1);
if (!sock->file) {
iput(SOCK_INODE(sock));
@@ -1213,9 +1210,9 @@ int sock_create(int family, int type, int protocol, struct socket **res)
}
EXPORT_SYMBOL(sock_create);
-int sock_create_kern(int family, int type, int protocol, struct socket **res)
+int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
{
- return __sock_create(&init_net, family, type, protocol, res, 1);
+ return __sock_create(net, family, type, protocol, res, 1);
}
EXPORT_SYMBOL(sock_create_kern);
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 1ec19f6f0c2b..eeeba5adee6d 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -793,20 +793,26 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
{
u32 value_follows;
int err;
+ struct page *scratch;
+
+ scratch = alloc_page(GFP_KERNEL);
+ if (!scratch)
+ return -ENOMEM;
+ xdr_set_scratch_buffer(xdr, page_address(scratch), PAGE_SIZE);
/* res->status */
err = gssx_dec_status(xdr, &res->status);
if (err)
- return err;
+ goto out_free;
/* res->context_handle */
err = gssx_dec_bool(xdr, &value_follows);
if (err)
- return err;
+ goto out_free;
if (value_follows) {
err = gssx_dec_ctx(xdr, res->context_handle);
if (err)
- return err;
+ goto out_free;
} else {
res->context_handle = NULL;
}
@@ -814,11 +820,11 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
/* res->output_token */
err = gssx_dec_bool(xdr, &value_follows);
if (err)
- return err;
+ goto out_free;
if (value_follows) {
err = gssx_dec_buffer(xdr, res->output_token);
if (err)
- return err;
+ goto out_free;
} else {
res->output_token = NULL;
}
@@ -826,14 +832,17 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
/* res->delegated_cred_handle */
err = gssx_dec_bool(xdr, &value_follows);
if (err)
- return err;
+ goto out_free;
if (value_follows) {
/* we do not support upcall servers sending this data. */
- return -EINVAL;
+ err = -EINVAL;
+ goto out_free;
}
/* res->options */
err = gssx_dec_option_array(xdr, &res->options);
+out_free:
+ __free_page(scratch);
return err;
}
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 46568b85c333..ac853acbe211 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -15,97 +15,359 @@
#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
+#include <linux/if_bridge.h>
#include <net/ip_fib.h>
#include <net/switchdev.h>
/**
- * netdev_switch_parent_id_get - Get ID of a switch
+ * switchdev_port_attr_get - Get port attribute
+ *
+ * @dev: port device
+ * @attr: attribute to get
+ */
+int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
+{
+ const struct switchdev_ops *ops = dev->switchdev_ops;
+ struct net_device *lower_dev;
+ struct list_head *iter;
+ struct switchdev_attr first = {
+ .id = SWITCHDEV_ATTR_UNDEFINED
+ };
+ int err = -EOPNOTSUPP;
+
+ if (ops && ops->switchdev_port_attr_get)
+ return ops->switchdev_port_attr_get(dev, attr);
+
+ if (attr->flags & SWITCHDEV_F_NO_RECURSE)
+ return err;
+
+ /* Switch device port(s) may be stacked under
+ * bond/team/vlan dev, so recurse down to get attr on
+ * each port. Return -ENODATA if attr values don't
+ * compare across ports.
+ */
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ err = switchdev_port_attr_get(lower_dev, attr);
+ if (err)
+ break;
+ if (first.id == SWITCHDEV_ATTR_UNDEFINED)
+ first = *attr;
+ else if (memcmp(&first, attr, sizeof(*attr)))
+ return -ENODATA;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
+
+static int __switchdev_port_attr_set(struct net_device *dev,
+ struct switchdev_attr *attr)
+{
+ const struct switchdev_ops *ops = dev->switchdev_ops;
+ struct net_device *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (ops && ops->switchdev_port_attr_set)
+ return ops->switchdev_port_attr_set(dev, attr);
+
+ if (attr->flags & SWITCHDEV_F_NO_RECURSE)
+ return err;
+
+ /* Switch device port(s) may be stacked under
+ * bond/team/vlan dev, so recurse down to set attr on
+ * each port.
+ */
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ err = __switchdev_port_attr_set(lower_dev, attr);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+struct switchdev_attr_set_work {
+ struct work_struct work;
+ struct net_device *dev;
+ struct switchdev_attr attr;
+};
+
+static void switchdev_port_attr_set_work(struct work_struct *work)
+{
+ struct switchdev_attr_set_work *asw =
+ container_of(work, struct switchdev_attr_set_work, work);
+ int err;
+
+ rtnl_lock();
+ err = switchdev_port_attr_set(asw->dev, &asw->attr);
+ BUG_ON(err);
+ rtnl_unlock();
+
+ dev_put(asw->dev);
+ kfree(work);
+}
+
+static int switchdev_port_attr_set_defer(struct net_device *dev,
+ struct switchdev_attr *attr)
+{
+ struct switchdev_attr_set_work *asw;
+
+ asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
+ if (!asw)
+ return -ENOMEM;
+
+ INIT_WORK(&asw->work, switchdev_port_attr_set_work);
+
+ dev_hold(dev);
+ asw->dev = dev;
+ memcpy(&asw->attr, attr, sizeof(asw->attr));
+
+ schedule_work(&asw->work);
+
+ return 0;
+}
+
+/**
+ * switchdev_port_attr_set - Set port attribute
+ *
* @dev: port device
- * @psid: switch ID
+ * @attr: attribute to set
*
- * Get ID of a switch this port is part of.
+ * Use a 2-phase prepare-commit transaction model to ensure
+ * system is not left in a partially updated state due to
+ * failure from driver/device.
*/
-int netdev_switch_parent_id_get(struct net_device *dev,
- struct netdev_phys_item_id *psid)
+int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
{
- const struct swdev_ops *ops = dev->swdev_ops;
+ int err;
+
+ if (!rtnl_is_locked()) {
+ /* Running prepare-commit transaction across stacked
+ * devices requires nothing moves, so if rtnl_lock is
+ * not held, schedule a worker thread to hold rtnl_lock
+ * while setting attr.
+ */
+
+ return switchdev_port_attr_set_defer(dev, attr);
+ }
+
+ /* Phase I: prepare for attr set. Driver/device should fail
+ * here if there are going to be issues in the commit phase,
+ * such as lack of resources or support. The driver/device
+ * should reserve resources needed for the commit phase here,
+ * but should not commit the attr.
+ */
+
+ attr->trans = SWITCHDEV_TRANS_PREPARE;
+ err = __switchdev_port_attr_set(dev, attr);
+ if (err) {
+ /* Prepare phase failed: abort the transaction. Any
+ * resources reserved in the prepare phase are
+ * released.
+ */
+
+ attr->trans = SWITCHDEV_TRANS_ABORT;
+ __switchdev_port_attr_set(dev, attr);
+
+ return err;
+ }
+
+ /* Phase II: commit attr set. This cannot fail as a fault
+ * of driver/device. If it does, it's a bug in the driver/device
+ * because the driver said everythings was OK in phase I.
+ */
+
+ attr->trans = SWITCHDEV_TRANS_COMMIT;
+ err = __switchdev_port_attr_set(dev, attr);
+ BUG_ON(err);
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
+
+static int __switchdev_port_obj_add(struct net_device *dev,
+ struct switchdev_obj *obj)
+{
+ const struct switchdev_ops *ops = dev->switchdev_ops;
+ struct net_device *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (ops && ops->switchdev_port_obj_add)
+ return ops->switchdev_port_obj_add(dev, obj);
+
+ /* Switch device port(s) may be stacked under
+ * bond/team/vlan dev, so recurse down to add object on
+ * each port.
+ */
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ err = __switchdev_port_obj_add(lower_dev, obj);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+/**
+ * switchdev_port_obj_add - Add port object
+ *
+ * @dev: port device
+ * @obj: object to add
+ *
+ * Use a 2-phase prepare-commit transaction model to ensure
+ * system is not left in a partially updated state due to
+ * failure from driver/device.
+ *
+ * rtnl_lock must be held.
+ */
+int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
+{
+ int err;
- if (!ops || !ops->swdev_parent_id_get)
- return -EOPNOTSUPP;
- return ops->swdev_parent_id_get(dev, psid);
+ ASSERT_RTNL();
+
+ /* Phase I: prepare for obj add. Driver/device should fail
+ * here if there are going to be issues in the commit phase,
+ * such as lack of resources or support. The driver/device
+ * should reserve resources needed for the commit phase here,
+ * but should not commit the obj.
+ */
+
+ obj->trans = SWITCHDEV_TRANS_PREPARE;
+ err = __switchdev_port_obj_add(dev, obj);
+ if (err) {
+ /* Prepare phase failed: abort the transaction. Any
+ * resources reserved in the prepare phase are
+ * released.
+ */
+
+ obj->trans = SWITCHDEV_TRANS_ABORT;
+ __switchdev_port_obj_add(dev, obj);
+
+ return err;
+ }
+
+ /* Phase II: commit obj add. This cannot fail as a fault
+ * of driver/device. If it does, it's a bug in the driver/device
+ * because the driver said everythings was OK in phase I.
+ */
+
+ obj->trans = SWITCHDEV_TRANS_COMMIT;
+ err = __switchdev_port_obj_add(dev, obj);
+ WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
+
+ return err;
}
-EXPORT_SYMBOL_GPL(netdev_switch_parent_id_get);
+EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
/**
- * netdev_switch_port_stp_update - Notify switch device port of STP
- * state change
+ * switchdev_port_obj_del - Delete port object
+ *
* @dev: port device
- * @state: port STP state
+ * @obj: object to delete
+ */
+int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
+{
+ const struct switchdev_ops *ops = dev->switchdev_ops;
+ struct net_device *lower_dev;
+ struct list_head *iter;
+ int err = -EOPNOTSUPP;
+
+ if (ops && ops->switchdev_port_obj_del)
+ return ops->switchdev_port_obj_del(dev, obj);
+
+ /* Switch device port(s) may be stacked under
+ * bond/team/vlan dev, so recurse down to delete object on
+ * each port.
+ */
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ err = switchdev_port_obj_del(lower_dev, obj);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
+
+/**
+ * switchdev_port_obj_dump - Dump port objects
*
- * Notify switch device port of bridge port STP state change.
+ * @dev: port device
+ * @obj: object to dump
*/
-int netdev_switch_port_stp_update(struct net_device *dev, u8 state)
+int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
{
- const struct swdev_ops *ops = dev->swdev_ops;
+ const struct switchdev_ops *ops = dev->switchdev_ops;
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
- if (ops && ops->swdev_port_stp_update)
- return ops->swdev_port_stp_update(dev, state);
+ if (ops && ops->switchdev_port_obj_dump)
+ return ops->switchdev_port_obj_dump(dev, obj);
+
+ /* Switch device port(s) may be stacked under
+ * bond/team/vlan dev, so recurse down to dump objects on
+ * first port at bottom of stack.
+ */
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = netdev_switch_port_stp_update(lower_dev, state);
- if (err && err != -EOPNOTSUPP)
- return err;
+ err = switchdev_port_obj_dump(lower_dev, obj);
+ break;
}
return err;
}
-EXPORT_SYMBOL_GPL(netdev_switch_port_stp_update);
+EXPORT_SYMBOL_GPL(switchdev_port_obj_dump);
-static DEFINE_MUTEX(netdev_switch_mutex);
-static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain);
+static DEFINE_MUTEX(switchdev_mutex);
+static RAW_NOTIFIER_HEAD(switchdev_notif_chain);
/**
- * register_netdev_switch_notifier - Register notifier
+ * register_switchdev_notifier - Register notifier
* @nb: notifier_block
*
* Register switch device notifier. This should be used by code
* which needs to monitor events happening in particular device.
* Return values are same as for atomic_notifier_chain_register().
*/
-int register_netdev_switch_notifier(struct notifier_block *nb)
+int register_switchdev_notifier(struct notifier_block *nb)
{
int err;
- mutex_lock(&netdev_switch_mutex);
- err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb);
- mutex_unlock(&netdev_switch_mutex);
+ mutex_lock(&switchdev_mutex);
+ err = raw_notifier_chain_register(&switchdev_notif_chain, nb);
+ mutex_unlock(&switchdev_mutex);
return err;
}
-EXPORT_SYMBOL_GPL(register_netdev_switch_notifier);
+EXPORT_SYMBOL_GPL(register_switchdev_notifier);
/**
- * unregister_netdev_switch_notifier - Unregister notifier
+ * unregister_switchdev_notifier - Unregister notifier
* @nb: notifier_block
*
* Unregister switch device notifier.
* Return values are same as for atomic_notifier_chain_unregister().
*/
-int unregister_netdev_switch_notifier(struct notifier_block *nb)
+int unregister_switchdev_notifier(struct notifier_block *nb)
{
int err;
- mutex_lock(&netdev_switch_mutex);
- err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb);
- mutex_unlock(&netdev_switch_mutex);
+ mutex_lock(&switchdev_mutex);
+ err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb);
+ mutex_unlock(&switchdev_mutex);
return err;
}
-EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier);
+EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
/**
- * call_netdev_switch_notifiers - Call notifiers
+ * call_switchdev_notifiers - Call notifiers
* @val: value passed unmodified to notifier function
* @dev: port device
* @info: notifier information data
@@ -114,146 +376,387 @@ EXPORT_SYMBOL_GPL(unregister_netdev_switch_notifier);
* when it needs to propagate hardware event.
* Return values are same as for atomic_notifier_call_chain().
*/
-int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
- struct netdev_switch_notifier_info *info)
+int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
+ struct switchdev_notifier_info *info)
{
int err;
info->dev = dev;
- mutex_lock(&netdev_switch_mutex);
- err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info);
- mutex_unlock(&netdev_switch_mutex);
+ mutex_lock(&switchdev_mutex);
+ err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
+ mutex_unlock(&switchdev_mutex);
return err;
}
-EXPORT_SYMBOL_GPL(call_netdev_switch_notifiers);
+EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
/**
- * netdev_switch_port_bridge_setlink - Notify switch device port of bridge
- * port attributes
+ * switchdev_port_bridge_getlink - Get bridge port attributes
*
* @dev: port device
- * @nlh: netlink msg with bridge port attributes
- * @flags: bridge setlink flags
*
- * Notify switch device port of bridge port attributes
+ * Called for SELF on rtnl_bridge_getlink to get bridge port
+ * attributes.
*/
-int netdev_switch_port_bridge_setlink(struct net_device *dev,
- struct nlmsghdr *nlh, u16 flags)
+int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+ struct net_device *dev, u32 filter_mask,
+ int nlflags)
{
- const struct net_device_ops *ops = dev->netdev_ops;
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
+ };
+ u16 mode = BRIDGE_MODE_UNDEF;
+ u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
+ int err;
- if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
- return 0;
+ err = switchdev_port_attr_get(dev, &attr);
+ if (err)
+ return err;
+
+ return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode,
+ attr.u.brport_flags, mask, nlflags);
+}
+EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink);
+
+static int switchdev_port_br_setflag(struct net_device *dev,
+ struct nlattr *nlattr,
+ unsigned long brport_flag)
+{
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
+ };
+ u8 flag = nla_get_u8(nlattr);
+ int err;
+
+ err = switchdev_port_attr_get(dev, &attr);
+ if (err)
+ return err;
+
+ if (flag)
+ attr.u.brport_flags |= brport_flag;
+ else
+ attr.u.brport_flags &= ~brport_flag;
+
+ return switchdev_port_attr_set(dev, &attr);
+}
+
+static const struct nla_policy
+switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = {
+ [IFLA_BRPORT_STATE] = { .type = NLA_U8 },
+ [IFLA_BRPORT_COST] = { .type = NLA_U32 },
+ [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 },
+ [IFLA_BRPORT_MODE] = { .type = NLA_U8 },
+ [IFLA_BRPORT_GUARD] = { .type = NLA_U8 },
+ [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 },
+ [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 },
+ [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 },
+ [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 },
+ [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
+};
+
+static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
+ struct nlattr *protinfo)
+{
+ struct nlattr *attr;
+ int rem;
+ int err;
+
+ err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX,
+ switchdev_port_bridge_policy);
+ if (err)
+ return err;
+
+ nla_for_each_nested(attr, protinfo, rem) {
+ switch (nla_type(attr)) {
+ case IFLA_BRPORT_LEARNING:
+ err = switchdev_port_br_setflag(dev, attr,
+ BR_LEARNING);
+ break;
+ case IFLA_BRPORT_LEARNING_SYNC:
+ err = switchdev_port_br_setflag(dev, attr,
+ BR_LEARNING_SYNC);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int switchdev_port_br_afspec(struct net_device *dev,
+ struct nlattr *afspec,
+ int (*f)(struct net_device *dev,
+ struct switchdev_obj *obj))
+{
+ struct nlattr *attr;
+ struct bridge_vlan_info *vinfo;
+ struct switchdev_obj obj = {
+ .id = SWITCHDEV_OBJ_PORT_VLAN,
+ };
+ struct switchdev_obj_vlan *vlan = &obj.u.vlan;
+ int rem;
+ int err;
- if (!ops->ndo_bridge_setlink)
- return -EOPNOTSUPP;
+ nla_for_each_nested(attr, afspec, rem) {
+ if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
+ continue;
+ if (nla_len(attr) != sizeof(struct bridge_vlan_info))
+ return -EINVAL;
+ vinfo = nla_data(attr);
+ vlan->flags = vinfo->flags;
+ if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
+ if (vlan->vid_start)
+ return -EINVAL;
+ vlan->vid_start = vinfo->vid;
+ } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
+ if (!vlan->vid_start)
+ return -EINVAL;
+ vlan->vid_end = vinfo->vid;
+ if (vlan->vid_end <= vlan->vid_start)
+ return -EINVAL;
+ err = f(dev, &obj);
+ if (err)
+ return err;
+ memset(vlan, 0, sizeof(*vlan));
+ } else {
+ if (vlan->vid_start)
+ return -EINVAL;
+ vlan->vid_start = vinfo->vid;
+ vlan->vid_end = vinfo->vid;
+ err = f(dev, &obj);
+ if (err)
+ return err;
+ memset(vlan, 0, sizeof(*vlan));
+ }
+ }
- return ops->ndo_bridge_setlink(dev, nlh, flags);
+ return 0;
}
-EXPORT_SYMBOL_GPL(netdev_switch_port_bridge_setlink);
/**
- * netdev_switch_port_bridge_dellink - Notify switch device port of bridge
- * port attribute delete
+ * switchdev_port_bridge_setlink - Set bridge port attributes
*
* @dev: port device
- * @nlh: netlink msg with bridge port attributes
- * @flags: bridge setlink flags
+ * @nlh: netlink header
+ * @flags: netlink flags
*
- * Notify switch device port of bridge port attribute delete
+ * Called for SELF on rtnl_bridge_setlink to set bridge port
+ * attributes.
*/
-int netdev_switch_port_bridge_dellink(struct net_device *dev,
- struct nlmsghdr *nlh, u16 flags)
+int switchdev_port_bridge_setlink(struct net_device *dev,
+ struct nlmsghdr *nlh, u16 flags)
{
- const struct net_device_ops *ops = dev->netdev_ops;
+ struct nlattr *protinfo;
+ struct nlattr *afspec;
+ int err = 0;
- if (!(dev->features &a