aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-27 13:04:52 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-27 13:04:52 -0800
commite0c38a4d1f196a4b17d2eba36afff8f656a4f1de (patch)
treeb26a69fabef0160adb127416a9744217700feeb7 /net
parent7f9f852c75e7d776b078813586c76a2bc7dca993 (diff)
parent90cadbbf341dd5b2df991c33a6bd6341f3a53788 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) New ipset extensions for matching on destination MAC addresses, from Stefano Brivio. 2) Add ipv4 ttl and tos, plus ipv6 flow label and hop limit offloads to nfp driver. From Stefano Brivio. 3) Implement GRO for plain UDP sockets, from Paolo Abeni. 4) Lots of work from Michał Mirosław to eliminate the VLAN_TAG_PRESENT bit so that we could support the entire vlan_tci value. 5) Rework the IPSEC policy lookups to better optimize more usecases, from Florian Westphal. 6) Infrastructure changes eliminating direct manipulation of SKB lists wherever possible, and to always use the appropriate SKB list helpers. This work is still ongoing... 7) Lots of PHY driver and state machine improvements and simplifications, from Heiner Kallweit. 8) Various TSO deferral refinements, from Eric Dumazet. 9) Add ntuple filter support to aquantia driver, from Dmitry Bogdanov. 10) Batch dropping of XDP packets in tuntap, from Jason Wang. 11) Lots of cleanups and improvements to the r8169 driver from Heiner Kallweit, including support for ->xmit_more. This driver has been getting some much needed love since he started working on it. 12) Lots of new forwarding selftests from Petr Machata. 13) Enable VXLAN learning in mlxsw driver, from Ido Schimmel. 14) Packed ring support for virtio, from Tiwei Bie. 15) Add new Aquantia AQtion USB driver, from Dmitry Bezrukov. 16) Add XDP support to dpaa2-eth driver, from Ioana Ciocoi Radulescu. 17) Implement coalescing on TCP backlog queue, from Eric Dumazet. 18) Implement carrier change in tun driver, from Nicolas Dichtel. 19) Support msg_zerocopy in UDP, from Willem de Bruijn. 20) Significantly improve garbage collection of neighbor objects when the table has many PERMANENT entries, from David Ahern. 21) Remove egdev usage from nfp and mlx5, and remove the facility completely from the tree as it no longer has any users. From Oz Shlomo and others. 22) Add a NETDEV_PRE_CHANGEADDR so that drivers can veto the change and therefore abort the operation before the commit phase (which is the NETDEV_CHANGEADDR event). From Petr Machata. 23) Add indirect call wrappers to avoid retpoline overhead, and use them in the GRO code paths. From Paolo Abeni. 24) Add support for netlink FDB get operations, from Roopa Prabhu. 25) Support bloom filter in mlxsw driver, from Nir Dotan. 26) Add SKB extension infrastructure. This consolidates the handling of the auxiliary SKB data used by IPSEC and bridge netfilter, and is designed to support the needs to MPTCP which could be integrated in the future. 27) Lots of XDP TX optimizations in mlx5 from Tariq Toukan. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1845 commits) net: dccp: fix kernel crash on module load drivers/net: appletalk/cops: remove redundant if statement and mask bnx2x: Fix NULL pointer dereference in bnx2x_del_all_vlans() on some hw net/net_namespace: Check the return value of register_pernet_subsys() net/netlink_compat: Fix a missing check of nla_parse_nested ieee802154: lowpan_header_create check must check daddr net/mlx4_core: drop useless LIST_HEAD mlxsw: spectrum: drop useless LIST_HEAD net/mlx5e: drop useless LIST_HEAD iptunnel: Set tun_flags in the iptunnel_metadata_reply from src net/mlx5e: fix semicolon.cocci warnings staging: octeon: fix build failure with XFRM enabled net: Revert recent Spectre-v1 patches. can: af_can: Fix Spectre v1 vulnerability packet: validate address length if non-zero nfc: af_nfc: Fix Spectre v1 vulnerability phonet: af_phonet: Fix Spectre v1 vulnerability net: core: Fix Spectre v1 vulnerability net: minor cleanup in skb_ext_add() net: drop the unused helper skb_ext_get() ...
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/debugfs.c13
-rw-r--r--net/8021q/vlan.c101
-rw-r--r--net/8021q/vlan.h12
-rw-r--r--net/8021q/vlan_core.c128
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/Kconfig4
-rw-r--r--net/batman-adv/Kconfig10
-rw-r--r--net/batman-adv/bat_iv_ogm.c25
-rw-r--r--net/batman-adv/bat_v.c26
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c82
-rw-r--r--net/batman-adv/debugfs.c2
-rw-r--r--net/batman-adv/distributed-arp-table.c42
-rw-r--r--net/batman-adv/gateway_client.c3
-rw-r--r--net/batman-adv/hard-interface.c3
-rw-r--r--net/batman-adv/hash.c2
-rw-r--r--net/batman-adv/hash.h6
-rw-r--r--net/batman-adv/log.c60
-rw-r--r--net/batman-adv/main.c3
-rw-r--r--net/batman-adv/main.h3
-rw-r--r--net/batman-adv/multicast.c51
-rw-r--r--net/batman-adv/netlink.c24
-rw-r--r--net/batman-adv/trace.c2
-rw-r--r--net/batman-adv/trace.h6
-rw-r--r--net/batman-adv/translation-table.c41
-rw-r--r--net/batman-adv/types.h5
-rw-r--r--net/bluetooth/6lowpan.c2
-rw-r--r--net/bluetooth/hci_event.c6
-rw-r--r--net/bluetooth/hci_request.c2
-rw-r--r--net/bluetooth/l2cap_core.c12
-rw-r--r--net/bluetooth/rfcomm/core.c12
-rw-r--r--net/bluetooth/rfcomm/sock.c12
-rw-r--r--net/bluetooth/sco.c12
-rw-r--r--net/bpf/test_run.c15
-rw-r--r--net/bridge/br.c89
-rw-r--r--net/bridge/br_device.c11
-rw-r--r--net/bridge/br_fdb.c46
-rw-r--r--net/bridge/br_if.c23
-rw-r--r--net/bridge/br_input.c4
-rw-r--r--net/bridge/br_mdb.c126
-rw-r--r--net/bridge/br_multicast.c442
-rw-r--r--net/bridge/br_netfilter_hooks.c54
-rw-r--r--net/bridge/br_netfilter_ipv6.c4
-rw-r--r--net/bridge/br_netlink.c71
-rw-r--r--net/bridge/br_private.h81
-rw-r--r--net/bridge/br_switchdev.c5
-rw-r--r--net/bridge/br_sysfs_br.c36
-rw-r--r--net/bridge/br_sysfs_if.c3
-rw-r--r--net/bridge/br_vlan.c71
-rw-r--r--net/core/datagram.c45
-rw-r--r--net/core/dev.c152
-rw-r--r--net/core/dev_addr_lists.c100
-rw-r--r--net/core/dev_ioctl.c4
-rw-r--r--net/core/devlink.c5
-rw-r--r--net/core/filter.c455
-rw-r--r--net/core/flow_dissector.c3
-rw-r--r--net/core/neighbour.c448
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/net_namespace.c162
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/rtnetlink.c517
-rw-r--r--net/core/skbuff.c313
-rw-r--r--net/core/skmsg.c23
-rw-r--r--net/core/sock.c14
-rw-r--r--net/core/sock_reuseport.c1
-rw-r--r--net/core/stream.c2
-rw-r--r--net/dccp/ipv4.c13
-rw-r--r--net/dccp/ipv6.c13
-rw-r--r--net/dccp/proto.c7
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/dsa/Kconfig4
-rw-r--r--net/dsa/dsa.c8
-rw-r--r--net/dsa/dsa_priv.h2
-rw-r--r--net/dsa/master.c29
-rw-r--r--net/dsa/port.c3
-rw-r--r--net/dsa/slave.c58
-rw-r--r--net/dsa/tag_brcm.c2
-rw-r--r--net/dsa/tag_dsa.c1
-rw-r--r--net/dsa/tag_edsa.c1
-rw-r--r--net/dsa/tag_gswip.c1
-rw-r--r--net/dsa/tag_ksz.c117
-rw-r--r--net/dsa/tag_lan9303.c1
-rw-r--r--net/dsa/tag_mtk.c1
-rw-r--r--net/dsa/tag_qca.c1
-rw-r--r--net/dsa/tag_trailer.c1
-rw-r--r--net/ethernet/eth.c56
-rw-r--r--net/ieee802154/6lowpan/tx.c3
-rw-r--r--net/ieee802154/nl-phy.c2
-rw-r--r--net/ipv4/af_inet.c17
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/esp4.c9
-rw-r--r--net/ipv4/esp4_offload.c15
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fou.c75
-rw-r--r--net/ipv4/gre_demux.c9
-rw-r--r--net/ipv4/icmp.c6
-rw-r--r--net/ipv4/inet_connection_sock.c14
-rw-r--r--net/ipv4/inet_hashtables.c117
-rw-r--r--net/ipv4/ip_forward.c7
-rw-r--r--net/ipv4/ip_gre.c56
-rw-r--r--net/ipv4/ip_input.c73
-rw-r--r--net/ipv4/ip_output.c39
-rw-r--r--net/ipv4/ip_tunnel_core.c3
-rw-r--r--net/ipv4/ipconfig.c19
-rw-r--r--net/ipv4/ipip.c14
-rw-r--r--net/ipv4/ipmr.c15
-rw-r--r--net/ipv4/metrics.c26
-rw-r--r--net/ipv4/netfilter/Kconfig5
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c184
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c43
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c150
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c83
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c6
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/protocol.c1
-rw-r--r--net/ipv4/raw.c31
-rw-r--r--net/ipv4/route.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
-rw-r--r--net/ipv4/tcp.c17
-rw-r--r--net/ipv4/tcp_bbr.c15
-rw-r--r--net/ipv4/tcp_bpf.c32
-rw-r--r--net/ipv4/tcp_input.c78
-rw-r--r--net/ipv4/tcp_ipv4.c132
-rw-r--r--net/ipv4/tcp_offload.c6
-rw-r--r--net/ipv4/tcp_output.c36
-rw-r--r--net/ipv4/tunnel4.c18
-rw-r--r--net/ipv4/udp.c266
-rw-r--r--net/ipv4/udp_impl.h2
-rw-r--r--net/ipv4/udp_offload.c122
-rw-r--r--net/ipv4/udp_tunnel.c18
-rw-r--r--net/ipv4/udplite.c4
-rw-r--r--net/ipv4/xfrm4_protocol.c18
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/anycast.c6
-rw-r--r--net/ipv6/datagram.c10
-rw-r--r--net/ipv6/esp6.c9
-rw-r--r--net/ipv6/esp6_offload.c15
-rw-r--r--net/ipv6/fou6.c74
-rw-r--r--net/ipv6/icmp.c4
-rw-r--r--net/ipv6/inet6_hashtables.c58
-rw-r--r--net/ipv6/ip6_gre.c24
-rw-r--r--net/ipv6/ip6_input.c63
-rw-r--r--net/ipv6/ip6_offload.c48
-rw-r--r--net/ipv6/ip6_output.c46
-rw-r--r--net/ipv6/ip6_udp_tunnel.c16
-rw-r--r--net/ipv6/ip6mr.c9
-rw-r--r--net/ipv6/ipv6_sockglue.c2
-rw-r--r--net/ipv6/netfilter/Makefile2
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c43
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c90
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c10
-rw-r--r--net/ipv6/raw.c5
-rw-r--r--net/ipv6/route.c5
-rw-r--r--net/ipv6/tcp_ipv6.c16
-rw-r--r--net/ipv6/tcpv6_offload.c7
-rw-r--r--net/ipv6/tunnel6.c12
-rw-r--r--net/ipv6/udp.c320
-rw-r--r--net/ipv6/udp_impl.h4
-rw-r--r--net/ipv6/udp_offload.c13
-rw-r--r--net/ipv6/udplite.c5
-rw-r--r--net/ipv6/xfrm6_input.c8
-rw-r--r--net/ipv6/xfrm6_policy.c1
-rw-r--r--net/ipv6/xfrm6_protocol.c18
-rw-r--r--net/ipv6/xfrm6_tunnel.c3
-rw-r--r--net/iucv/af_iucv.c41
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l3mdev/l3mdev.c18
-rw-r--r--net/mac80211/Kconfig11
-rw-r--r--net/mac80211/cfg.c29
-rw-r--r--net/mac80211/debugfs_netdev.c3
-rw-r--r--net/mac80211/debugfs_sta.c14
-rw-r--r--net/mac80211/driver-ops.h34
-rw-r--r--net/mac80211/ieee80211_i.h1
-rw-r--r--net/mac80211/iface.c10
-rw-r--r--net/mac80211/main.c4
-rw-r--r--net/mac80211/mesh.c8
-rw-r--r--net/mac80211/mesh.h3
-rw-r--r--net/mac80211/mesh_plink.c35
-rw-r--r--net/mac80211/mlme.c53
-rw-r--r--net/mac80211/rx.c37
-rw-r--r--net/mac80211/scan.c22
-rw-r--r--net/mac80211/sta_info.c11
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/trace.h18
-rw-r--r--net/mac80211/tx.c11
-rw-r--r--net/mac80211/util.c51
-rw-r--r--net/ncsi/internal.h24
-rw-r--r--net/ncsi/ncsi-aen.c75
-rw-r--r--net/ncsi/ncsi-manage.c550
-rw-r--r--net/ncsi/ncsi-netlink.c233
-rw-r--r--net/ncsi/ncsi-pkt.h9
-rw-r--r--net/ncsi/ncsi-rsp.c43
-rw-r--r--net/netfilter/Kconfig15
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c13
-rw-r--r--net/netfilter/ipset/ip_set_core.c170
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h4
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmac.c27
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c10
-rw-r--r--net/netfilter/nf_conntrack_acct.c89
-rw-r--r--net/netfilter/nf_conntrack_core.c28
-rw-r--r--net/netfilter/nf_conntrack_ecache.c66
-rw-r--r--net/netfilter/nf_conntrack_helper.c69
-rw-r--r--net/netfilter/nf_conntrack_netlink.c30
-rw-r--r--net/netfilter/nf_conntrack_proto.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c42
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c18
-rw-r--r--net/netfilter/nf_conntrack_standalone.c103
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c70
-rw-r--r--net/netfilter/nf_flow_table_core.c42
-rw-r--r--net/netfilter/nf_log_common.c20
-rw-r--r--net/netfilter/nf_nat_core.c327
-rw-r--r--net/netfilter/nf_nat_proto.c343
-rw-r--r--net/netfilter/nf_nat_proto_common.c120
-rw-r--r--net/netfilter/nf_nat_proto_dccp.c82
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c77
-rw-r--r--net/netfilter/nf_nat_proto_tcp.c85
-rw-r--r--net/netfilter/nf_nat_proto_udp.c130
-rw-r--r--net/netfilter/nf_nat_proto_unknown.c54
-rw-r--r--net/netfilter/nf_nat_sip.c39
-rw-r--r--net/netfilter/nf_queue.c50
-rw-r--r--net/netfilter/nf_tables_api.c90
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/nfnetlink_queue.c28
-rw-r--r--net/netfilter/nft_meta.c2
-rw-r--r--net/netfilter/nft_xfrm.c2
-rw-r--r--net/netfilter/xt_hashlimit.c4
-rw-r--r--net/netfilter/xt_physdev.c2
-rw-r--r--net/netfilter/xt_policy.c2
-rw-r--r--net/openvswitch/actions.c13
-rw-r--r--net/openvswitch/flow.c6
-rw-r--r--net/openvswitch/flow.h2
-rw-r--r--net/openvswitch/flow_netlink.c22
-rw-r--r--net/openvswitch/vport-geneve.c2
-rw-r--r--net/openvswitch/vport-gre.c2
-rw-r--r--net/openvswitch/vport-netdev.c1
-rw-r--r--net/openvswitch/vport-vxlan.c2
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/rfkill/rfkill-gpio.c1
-rw-r--r--net/sched/act_api.c221
-rw-r--r--net/sched/act_tunnel_key.c25
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_api.c337
-rw-r--r--net/sched/cls_bpf.c4
-rw-r--r--net/sched/cls_flower.c170
-rw-r--r--net/sched/cls_matchall.c5
-rw-r--r--net/sched/cls_u32.c10
-rw-r--r--net/sched/sch_api.c99
-rw-r--r--net/sched/sch_etf.c79
-rw-r--r--net/sched/sch_fq.c28
-rw-r--r--net/sched/sch_gred.c375
-rw-r--r--net/sched/sch_mq.c18
-rw-r--r--net/sched/sch_netem.c89
-rw-r--r--net/sched/sch_prio.c47
-rw-r--r--net/sched/sch_red.c48
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/bind_addr.c28
-rw-r--r--net/sctp/chunk.c8
-rw-r--r--net/sctp/input.c134
-rw-r--r--net/sctp/ipv6.c7
-rw-r--r--net/sctp/primitive.c2
-rw-r--r--net/sctp/sm_sideeffect.c12
-rw-r--r--net/sctp/sm_statetable.c2
-rw-r--r--net/sctp/socket.c174
-rw-r--r--net/sctp/stream_interleave.c46
-rw-r--r--net/sctp/ulpqueue.c8
-rw-r--r--net/smc/af_smc.c51
-rw-r--r--net/smc/smc_clc.c33
-rw-r--r--net/smc/smc_clc.h3
-rw-r--r--net/smc/smc_core.c16
-rw-r--r--net/smc/smc_core.h6
-rw-r--r--net/smc/smc_llc.c57
-rw-r--r--net/smc/smc_llc.h2
-rw-r--r--net/sunrpc/socklib.c2
-rw-r--r--net/switchdev/switchdev.c213
-rw-r--r--net/tipc/Makefile4
-rw-r--r--net/tipc/bearer.c9
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/link.c220
-rw-r--r--net/tipc/link.h2
-rw-r--r--net/tipc/msg.h1
-rw-r--r--net/tipc/netlink_compat.c7
-rw-r--r--net/tipc/node.c96
-rw-r--r--net/tipc/node.h1
-rw-r--r--net/tipc/socket.c227
-rw-r--r--net/tipc/socket.h4
-rw-r--r--net/tipc/sysctl.c8
-rw-r--r--net/tipc/trace.c206
-rw-r--r--net/tipc/trace.h431
-rw-r--r--net/tls/tls_main.c14
-rw-r--r--net/tls/tls_sw.c54
-rw-r--r--net/wireless/Makefile1
-rw-r--r--net/wireless/chan.c3
-rw-r--r--net/wireless/core.c48
-rw-r--r--net/wireless/core.h5
-rw-r--r--net/wireless/nl80211.c307
-rw-r--r--net/wireless/nl80211.h32
-rw-r--r--net/wireless/pmsr.c590
-rw-r--r--net/wireless/rdev-ops.h25
-rw-r--r--net/wireless/scan.c2
-rw-r--r--net/wireless/trace.h92
-rw-r--r--net/wireless/util.c15
-rw-r--r--net/xdp/xsk.c16
-rw-r--r--net/xfrm/Kconfig1
-rw-r--r--net/xfrm/xfrm_device.c4
-rw-r--r--net/xfrm/xfrm_input.c76
-rw-r--r--net/xfrm/xfrm_interface.c2
-rw-r--r--net/xfrm/xfrm_output.c7
-rw-r--r--net/xfrm/xfrm_policy.c1270
310 files changed, 11745 insertions, 5214 deletions
diff --git a/net/6lowpan/debugfs.c b/net/6lowpan/debugfs.c
index 24915e0bb9ea..6c152f9ea26e 100644
--- a/net/6lowpan/debugfs.c
+++ b/net/6lowpan/debugfs.c
@@ -232,18 +232,7 @@ static int lowpan_context_show(struct seq_file *file, void *offset)
return 0;
}
-
-static int lowpan_context_open(struct inode *inode, struct file *file)
-{
- return single_open(file, lowpan_context_show, inode->i_private);
-}
-
-static const struct file_operations lowpan_context_fops = {
- .open = lowpan_context_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(lowpan_context);
static int lowpan_short_addr_get(void *data, u64 *val)
{
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 5e9950453955..dc4411165e43 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -330,6 +330,7 @@ static void vlan_transfer_features(struct net_device *dev,
vlandev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
vlandev->priv_flags |= (vlan->real_dev->priv_flags & IFF_XMIT_DST_RELEASE);
+ vlandev->hw_enc_features = vlan_tnl_features(vlan->real_dev);
netdev_update_features(vlandev);
}
@@ -357,6 +358,7 @@ static int __vlan_device_event(struct net_device *dev, unsigned long event)
static int vlan_device_event(struct notifier_block *unused, unsigned long event,
void *ptr)
{
+ struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct vlan_group *grp;
struct vlan_info *vlan_info;
@@ -459,7 +461,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
vlan = vlan_dev_priv(vlandev);
if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
- dev_change_flags(vlandev, flgs | IFF_UP);
+ dev_change_flags(vlandev, flgs | IFF_UP,
+ extack);
netif_stacked_transfer_operstate(dev, vlandev);
}
break;
@@ -647,93 +650,6 @@ out:
return err;
}
-static struct sk_buff *vlan_gro_receive(struct list_head *head,
- struct sk_buff *skb)
-{
- const struct packet_offload *ptype;
- unsigned int hlen, off_vlan;
- struct sk_buff *pp = NULL;
- struct vlan_hdr *vhdr;
- struct sk_buff *p;
- __be16 type;
- int flush = 1;
-
- off_vlan = skb_gro_offset(skb);
- hlen = off_vlan + sizeof(*vhdr);
- vhdr = skb_gro_header_fast(skb, off_vlan);
- if (skb_gro_header_hard(skb, hlen)) {
- vhdr = skb_gro_header_slow(skb, hlen, off_vlan);
- if (unlikely(!vhdr))
- goto out;
- }
-
- type = vhdr->h_vlan_encapsulated_proto;
-
- rcu_read_lock();
- ptype = gro_find_receive_by_type(type);
- if (!ptype)
- goto out_unlock;
-
- flush = 0;
-
- list_for_each_entry(p, head, list) {
- struct vlan_hdr *vhdr2;
-
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
-
- vhdr2 = (struct vlan_hdr *)(p->data + off_vlan);
- if (compare_vlan_header(vhdr, vhdr2))
- NAPI_GRO_CB(p)->same_flow = 0;
- }
-
- skb_gro_pull(skb, sizeof(*vhdr));
- skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
- pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
-
-out_unlock:
- rcu_read_unlock();
-out:
- skb_gro_flush_final(skb, pp, flush);
-
- return pp;
-}
-
-static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
-{
- struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff);
- __be16 type = vhdr->h_vlan_encapsulated_proto;
- struct packet_offload *ptype;
- int err = -ENOENT;
-
- rcu_read_lock();
- ptype = gro_find_complete_by_type(type);
- if (ptype)
- err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
-
- rcu_read_unlock();
- return err;
-}
-
-static struct packet_offload vlan_packet_offloads[] __read_mostly = {
- {
- .type = cpu_to_be16(ETH_P_8021Q),
- .priority = 10,
- .callbacks = {
- .gro_receive = vlan_gro_receive,
- .gro_complete = vlan_gro_complete,
- },
- },
- {
- .type = cpu_to_be16(ETH_P_8021AD),
- .priority = 10,
- .callbacks = {
- .gro_receive = vlan_gro_receive,
- .gro_complete = vlan_gro_complete,
- },
- },
-};
-
static int __net_init vlan_init_net(struct net *net)
{
struct vlan_net *vn = net_generic(net, vlan_net_id);
@@ -761,7 +677,6 @@ static struct pernet_operations vlan_net_ops = {
static int __init vlan_proto_init(void)
{
int err;
- unsigned int i;
pr_info("%s v%s\n", vlan_fullname, vlan_version);
@@ -785,9 +700,6 @@ static int __init vlan_proto_init(void)
if (err < 0)
goto err5;
- for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
- dev_add_offload(&vlan_packet_offloads[i]);
-
vlan_ioctl_set(vlan_ioctl_handler);
return 0;
@@ -805,13 +717,8 @@ err0:
static void __exit vlan_cleanup_module(void)
{
- unsigned int i;
-
vlan_ioctl_set(NULL);
- for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
- dev_remove_offload(&vlan_packet_offloads[i]);
-
vlan_netlink_fini();
unregister_netdevice_notifier(&vlan_notifier_block);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 44df1c3df02d..c46daf09a501 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -92,6 +92,18 @@ static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
return NULL;
}
+static inline netdev_features_t vlan_tnl_features(struct net_device *real_dev)
+{
+ netdev_features_t ret;
+
+ ret = real_dev->hw_enc_features &
+ (NETIF_F_CSUM_MASK | NETIF_F_ALL_TSO | NETIF_F_GSO_ENCAP_ALL);
+
+ if ((ret & NETIF_F_GSO_ENCAP_ALL) && (ret & NETIF_F_CSUM_MASK))
+ return (ret & ~NETIF_F_CSUM_MASK) | NETIF_F_HW_CSUM;
+ return 0;
+}
+
#define vlan_group_for_each_dev(grp, i, dev) \
for ((i) = 0; i < VLAN_PROTO_NUM * VLAN_N_VID; i++) \
if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 4f60e86f4b8d..a313165e7a67 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -57,7 +57,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
}
skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats);
@@ -223,6 +223,33 @@ static int vlan_kill_rx_filter_info(struct net_device *dev, __be16 proto, u16 vi
return -ENODEV;
}
+int vlan_for_each(struct net_device *dev,
+ int (*action)(struct net_device *dev, int vid, void *arg),
+ void *arg)
+{
+ struct vlan_vid_info *vid_info;
+ struct vlan_info *vlan_info;
+ struct net_device *vdev;
+ int ret;
+
+ ASSERT_RTNL();
+
+ vlan_info = rtnl_dereference(dev->vlan_info);
+ if (!vlan_info)
+ return 0;
+
+ list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
+ vdev = vlan_group_get_device(&vlan_info->grp, vid_info->proto,
+ vid_info->vid);
+ ret = action(vdev, vid_info->vid, arg);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(vlan_for_each);
+
int vlan_filter_push_vids(struct vlan_info *vlan_info, __be16 proto)
{
struct net_device *real_dev = vlan_info->real_dev;
@@ -426,3 +453,102 @@ bool vlan_uses_dev(const struct net_device *dev)
return vlan_info->grp.nr_vlan_devs ? true : false;
}
EXPORT_SYMBOL(vlan_uses_dev);
+
+static struct sk_buff *vlan_gro_receive(struct list_head *head,
+ struct sk_buff *skb)
+{
+ const struct packet_offload *ptype;
+ unsigned int hlen, off_vlan;
+ struct sk_buff *pp = NULL;
+ struct vlan_hdr *vhdr;
+ struct sk_buff *p;
+ __be16 type;
+ int flush = 1;
+
+ off_vlan = skb_gro_offset(skb);
+ hlen = off_vlan + sizeof(*vhdr);
+ vhdr = skb_gro_header_fast(skb, off_vlan);
+ if (skb_gro_header_hard(skb, hlen)) {
+ vhdr = skb_gro_header_slow(skb, hlen, off_vlan);
+ if (unlikely(!vhdr))
+ goto out;
+ }
+
+ type = vhdr->h_vlan_encapsulated_proto;
+
+ rcu_read_lock();
+ ptype = gro_find_receive_by_type(type);
+ if (!ptype)
+ goto out_unlock;
+
+ flush = 0;
+
+ list_for_each_entry(p, head, list) {
+ struct vlan_hdr *vhdr2;
+
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ vhdr2 = (struct vlan_hdr *)(p->data + off_vlan);
+ if (compare_vlan_header(vhdr, vhdr2))
+ NAPI_GRO_CB(p)->same_flow = 0;
+ }
+
+ skb_gro_pull(skb, sizeof(*vhdr));
+ skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
+ pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
+
+out_unlock:
+ rcu_read_unlock();
+out:
+ skb_gro_flush_final(skb, pp, flush);
+
+ return pp;
+}
+
+static int vlan_gro_complete(struct sk_buff *skb, int nhoff)
+{
+ struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data + nhoff);
+ __be16 type = vhdr->h_vlan_encapsulated_proto;
+ struct packet_offload *ptype;
+ int err = -ENOENT;
+
+ rcu_read_lock();
+ ptype = gro_find_complete_by_type(type);
+ if (ptype)
+ err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(*vhdr));
+
+ rcu_read_unlock();
+ return err;
+}
+
+static struct packet_offload vlan_packet_offloads[] __read_mostly = {
+ {
+ .type = cpu_to_be16(ETH_P_8021Q),
+ .priority = 10,
+ .callbacks = {
+ .gro_receive = vlan_gro_receive,
+ .gro_complete = vlan_gro_complete,
+ },
+ },
+ {
+ .type = cpu_to_be16(ETH_P_8021AD),
+ .priority = 10,
+ .callbacks = {
+ .gro_receive = vlan_gro_receive,
+ .gro_complete = vlan_gro_complete,
+ },
+ },
+};
+
+static int __init vlan_offload_init(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(vlan_packet_offloads); i++)
+ dev_add_offload(&vlan_packet_offloads[i]);
+
+ return 0;
+}
+
+fs_initcall(vlan_offload_init);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index ff720f1ebf73..b2d9c8f27cd7 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -562,6 +562,7 @@ static int vlan_dev_init(struct net_device *dev)
dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG |
NETIF_F_FRAGLIST | NETIF_F_GSO_SOFTWARE |
+ NETIF_F_GSO_ENCAP_ALL |
NETIF_F_HIGHDMA | NETIF_F_SCTP_CRC |
NETIF_F_ALL_FCOE;
@@ -572,6 +573,7 @@ static int vlan_dev_init(struct net_device *dev)
netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n");
dev->vlan_features = real_dev->vlan_features & ~NETIF_F_ALL_FCOE;
+ dev->hw_enc_features = vlan_tnl_features(real_dev);
/* ipv6 shared card related stuff */
dev->dev_id = real_dev->dev_id;
diff --git a/net/Kconfig b/net/Kconfig
index f235edb593ba..5cb9de1aaf88 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -51,6 +51,9 @@ config NET_INGRESS
config NET_EGRESS
bool
+config SKB_EXTENSIONS
+ bool
+
menu "Networking options"
source "net/packet/Kconfig"
@@ -184,6 +187,7 @@ config BRIDGE_NETFILTER
depends on NETFILTER && INET
depends on NETFILTER_ADVANCED
select NETFILTER_FAMILY_BRIDGE
+ select SKB_EXTENSIONS
default m
---help---
Enabling this option will let arptables resp. iptables see bridged
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index f75816f58107..c386e6981416 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -22,7 +22,6 @@
config BATMAN_ADV
tristate "B.A.T.M.A.N. Advanced Meshing Protocol"
depends on NET
- select CRC16
select LIBCRC32C
help
B.A.T.M.A.N. (better approach to mobile ad-hoc networking) is
@@ -48,6 +47,7 @@ config BATMAN_ADV_BATMAN_V
config BATMAN_ADV_BLA
bool "Bridge Loop Avoidance"
depends on BATMAN_ADV && INET
+ select CRC16
default y
help
This option enables BLA (Bridge Loop Avoidance), a mechanism
@@ -82,6 +82,7 @@ config BATMAN_ADV_NC
config BATMAN_ADV_MCAST
bool "Multicast optimisation"
depends on BATMAN_ADV && INET && !(BRIDGE=m && BATMAN_ADV=y)
+ default y
help
This option enables the multicast optimisation which aims to
reduce the air overhead while improving the reliability of
@@ -100,12 +101,13 @@ config BATMAN_ADV_DEBUGFS
config BATMAN_ADV_DEBUG
bool "B.A.T.M.A.N. debugging"
- depends on BATMAN_ADV_DEBUGFS
+ depends on BATMAN_ADV
help
This is an option for use by developers; most people should
say N here. This enables compilation of support for
- outputting debugging information to the kernel log. The
- output is controlled via the module parameter debug.
+ outputting debugging information to the debugfs log or tracing
+ buffer. The output is controlled via the batadv netdev specific
+ log_level setting.
config BATMAN_ADV_TRACING
bool "B.A.T.M.A.N. tracing support"
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index d2227091029f..f97e566f0402 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -34,7 +34,6 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
-#include <linux/lockdep.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/pkt_sched.h>
@@ -2585,13 +2584,14 @@ static void batadv_iv_gw_print(struct batadv_priv *bat_priv,
* batadv_iv_gw_dump_entry() - Dump a gateway into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
* @gw_node: Gateway to be dumped
*
* Return: Error code, or 0 on success
*/
-static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_priv *bat_priv,
struct batadv_gw_node *gw_node)
{
@@ -2611,13 +2611,16 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_GATEWAYS);
if (!hdr) {
ret = -ENOBUFS;
goto out;
}
+ genl_dump_check_consistent(cb, hdr);
+
ret = -EMSGSIZE;
if (curr_gw == gw_node)
@@ -2668,13 +2671,15 @@ static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
int idx_skip = cb->args[0];
int idx = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
+ spin_lock_bh(&bat_priv->gw.list_lock);
+ cb->seq = bat_priv->gw.generation << 1 | 1;
+
+ hlist_for_each_entry(gw_node, &bat_priv->gw.gateway_list, list) {
if (idx++ < idx_skip)
continue;
- if (batadv_iv_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
- bat_priv, gw_node)) {
+ if (batadv_iv_gw_dump_entry(msg, portid, cb, bat_priv,
+ gw_node)) {
idx_skip = idx - 1;
goto unlock;
}
@@ -2682,7 +2687,7 @@ static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
idx_skip = idx;
unlock:
- rcu_read_unlock();
+ spin_unlock_bh(&bat_priv->gw.list_lock);
cb->args[0] = idx_skip;
}
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 6baec4e68898..90e33f84d37a 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -27,11 +27,13 @@
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/kref.h>
+#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/seq_file.h>
+#include <linux/spinlock.h>
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/workqueue.h>
@@ -915,13 +917,14 @@ static void batadv_v_gw_print(struct batadv_priv *bat_priv,
* batadv_v_gw_dump_entry() - Dump a gateway into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
* @gw_node: Gateway to be dumped
*
* Return: Error code, or 0 on success
*/
-static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_priv *bat_priv,
struct batadv_gw_node *gw_node)
{
@@ -941,13 +944,16 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_GATEWAYS);
if (!hdr) {
ret = -ENOBUFS;
goto out;
}
+ genl_dump_check_consistent(cb, hdr);
+
ret = -EMSGSIZE;
if (curr_gw == gw_node) {
@@ -1018,13 +1024,15 @@ static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
int idx_skip = cb->args[0];
int idx = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
+ spin_lock_bh(&bat_priv->gw.list_lock);
+ cb->seq = bat_priv->gw.generation << 1 | 1;
+
+ hlist_for_each_entry(gw_node, &bat_priv->gw.gateway_list, list) {
if (idx++ < idx_skip)
continue;
- if (batadv_v_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
- bat_priv, gw_node)) {
+ if (batadv_v_gw_dump_entry(msg, portid, cb, bat_priv,
+ gw_node)) {
idx_skip = idx - 1;
goto unlock;
}
@@ -1032,7 +1040,7 @@ static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
idx_skip = idx;
unlock:
- rcu_read_unlock();
+ spin_unlock_bh(&bat_priv->gw.list_lock);
cb->args[0] = idx_skip;
}
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 5f1aeeded0e3..5fdde2947802 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -2094,14 +2094,15 @@ out:
* to a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @primary_if: primary interface
* @claim: entry to dump
*
* Return: 0 or error code.
*/
static int
-batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_hard_iface *primary_if,
struct batadv_bla_claim *claim)
{
@@ -2111,13 +2112,16 @@ batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
void *hdr;
int ret = -EINVAL;
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI, BATADV_CMD_GET_BLA_CLAIM);
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_BLA_CLAIM);
if (!hdr) {
ret = -ENOBUFS;
goto out;
}
+ genl_dump_check_consistent(cb, hdr);
+
is_own = batadv_compare_eth(claim->backbone_gw->orig,
primary_addr);
@@ -2153,28 +2157,33 @@ out:
* to a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @primary_if: primary interface
- * @head: bucket to dump
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
* @idx_skip: How many entries to skip
*
* Return: always 0.
*/
static int
-batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_hard_iface *primary_if,
- struct hlist_head *head, int *idx_skip)
+ struct batadv_hashtable *hash, unsigned int bucket,
+ int *idx_skip)
{
struct batadv_bla_claim *claim;
int idx = 0;
int ret = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(claim, head, hash_entry) {
+ spin_lock_bh(&hash->list_locks[bucket]);
+ cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+ hlist_for_each_entry(claim, &hash->table[bucket], hash_entry) {
if (idx++ < *idx_skip)
continue;
- ret = batadv_bla_claim_dump_entry(msg, portid, seq,
+ ret = batadv_bla_claim_dump_entry(msg, portid, cb,
primary_if, claim);
if (ret) {
*idx_skip = idx - 1;
@@ -2184,7 +2193,7 @@ batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
*idx_skip = 0;
unlock:
- rcu_read_unlock();
+ spin_unlock_bh(&hash->list_locks[bucket]);
return ret;
}
@@ -2204,7 +2213,6 @@ int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb)
struct batadv_hashtable *hash;
struct batadv_priv *bat_priv;
int bucket = cb->args[0];
- struct hlist_head *head;
int idx = cb->args[1];
int ifindex;
int ret = 0;
@@ -2230,11 +2238,8 @@ int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb)
}
while (bucket < hash->size) {
- head = &hash->table[bucket];
-
- if (batadv_bla_claim_dump_bucket(msg, portid,
- cb->nlh->nlmsg_seq,
- primary_if, head, &idx))
+ if (batadv_bla_claim_dump_bucket(msg, portid, cb, primary_if,
+ hash, bucket, &idx))
break;
bucket++;
}
@@ -2325,14 +2330,15 @@ out:
* netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @primary_if: primary interface
* @backbone_gw: entry to dump
*
* Return: 0 or error code.
*/
static int
-batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_hard_iface *primary_if,
struct batadv_bla_backbone_gw *backbone_gw)
{
@@ -2343,13 +2349,16 @@ batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
void *hdr;
int ret = -EINVAL;
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI, BATADV_CMD_GET_BLA_BACKBONE);
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_BLA_BACKBONE);
if (!hdr) {
ret = -ENOBUFS;
goto out;
}
+ genl_dump_check_consistent(cb, hdr);
+
is_own = batadv_compare_eth(backbone_gw->orig, primary_addr);
spin_lock_bh(&backbone_gw->crc_lock);
@@ -2386,28 +2395,33 @@ out:
* a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @primary_if: primary interface
- * @head: bucket to dump
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
* @idx_skip: How many entries to skip
*
* Return: always 0.
*/
static int
-batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_hard_iface *primary_if,
- struct hlist_head *head, int *idx_skip)
+ struct batadv_hashtable *hash,
+ unsigned int bucket, int *idx_skip)
{
struct batadv_bla_backbone_gw *backbone_gw;
int idx = 0;
int ret = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
+ spin_lock_bh(&hash->list_locks[bucket]);
+ cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+ hlist_for_each_entry(backbone_gw, &hash->table[bucket], hash_entry) {
if (idx++ < *idx_skip)
continue;
- ret = batadv_bla_backbone_dump_entry(msg, portid, seq,
+ ret = batadv_bla_backbone_dump_entry(msg, portid, cb,
primary_if, backbone_gw);
if (ret) {
*idx_skip = idx - 1;
@@ -2417,7 +2431,7 @@ batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
*idx_skip = 0;
unlock:
- rcu_read_unlock();
+ spin_unlock_bh(&hash->list_locks[bucket]);
return ret;
}
@@ -2437,7 +2451,6 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb)
struct batadv_hashtable *hash;
struct batadv_priv *bat_priv;
int bucket = cb->args[0];
- struct hlist_head *head;
int idx = cb->args[1];
int ifindex;
int ret = 0;
@@ -2463,11 +2476,8 @@ int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb)
}
while (bucket < hash->size) {
- head = &hash->table[bucket];
-
- if (batadv_bla_backbone_dump_bucket(msg, portid,
- cb->nlh->nlmsg_seq,
- primary_if, head, &idx))
+ if (batadv_bla_backbone_dump_bucket(msg, portid, cb, primary_if,
+ hash, bucket, &idx))
break;
bucket++;
}
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 8b608a2e2653..d4a7702e48d8 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -19,6 +19,7 @@
#include "debugfs.h"
#include "main.h"
+#include <asm/current.h>
#include <linux/dcache.h>
#include <linux/debugfs.h>
#include <linux/err.h>
@@ -27,6 +28,7 @@
#include <linux/fs.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
+#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/stat.h>
#include <linux/stddef.h>
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index a60bacf7120b..b9ffe1826527 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -863,23 +863,27 @@ out:
* netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @dat_entry: entry to dump
*
* Return: 0 or error code.
*/
static int
-batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_dat_entry *dat_entry)
{
int msecs;
void *hdr;
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI, BATADV_CMD_GET_DAT_CACHE);
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_DAT_CACHE);
if (!hdr)
return -ENOBUFS;
+ genl_dump_check_consistent(cb, hdr);
+
msecs = jiffies_to_msecs(jiffies - dat_entry->last_update);
if (nla_put_in_addr(msg, BATADV_ATTR_DAT_CACHE_IP4ADDRESS,
@@ -901,27 +905,31 @@ batadv_dat_cache_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
* a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
- * @head: bucket to dump
+ * @cb: Control block containing additional options
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
* @idx_skip: How many entries to skip
*
* Return: 0 or error code.
*/
static int
-batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
- struct hlist_head *head, int *idx_skip)
+batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
+ struct batadv_hashtable *hash, unsigned int bucket,
+ int *idx_skip)
{
struct batadv_dat_entry *dat_entry;
int idx = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(dat_entry, head, hash_entry) {
+ spin_lock_bh(&hash->list_locks[bucket]);
+ cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+ hlist_for_each_entry(dat_entry, &hash->table[bucket], hash_entry) {
if (idx < *idx_skip)
goto skip;
- if (batadv_dat_cache_dump_entry(msg, portid, seq,
- dat_entry)) {
- rcu_read_unlock();
+ if (batadv_dat_cache_dump_entry(msg, portid, cb, dat_entry)) {
+ spin_unlock_bh(&hash->list_locks[bucket]);
*idx_skip = idx;
return -EMSGSIZE;
@@ -930,7 +938,7 @@ batadv_dat_cache_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
skip:
idx++;
}
- rcu_read_unlock();
+ spin_unlock_bh(&hash->list_locks[bucket]);
return 0;
}
@@ -951,7 +959,6 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
struct batadv_hashtable *hash;
struct batadv_priv *bat_priv;
int bucket = cb->args[0];
- struct hlist_head *head;
int idx = cb->args[1];
int ifindex;
int ret = 0;
@@ -977,10 +984,7 @@ int batadv_dat_cache_dump(struct sk_buff *msg, struct netlink_callback *cb)
}
while (bucket < hash->size) {
- head = &hash->table[bucket];
-
- if (batadv_dat_cache_dump_bucket(msg, portid,
- cb->nlh->nlmsg_seq, head,
+ if (batadv_dat_cache_dump_bucket(msg, portid, cb, hash, bucket,
&idx))
break;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 140c61a3f1ec..9d8e5eda2314 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -377,6 +377,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
kref_get(&gw_node->refcount);
hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.gateway_list);
+ bat_priv->gw.generation++;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Found new gateway %pM -> gw bandwidth: %u.%u/%u.%u MBit\n",
@@ -472,6 +473,7 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
if (!hlist_unhashed(&gw_node->list)) {
hlist_del_init_rcu(&gw_node->list);
batadv_gw_node_put(gw_node);
+ bat_priv->gw.generation++;
}
spin_unlock_bh(&bat_priv->gw.list_lock);
@@ -518,6 +520,7 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
&bat_priv->gw.gateway_list, list) {
hlist_del_init_rcu(&gw_node->list);
batadv_gw_node_put(gw_node);
+ bat_priv->gw.generation++;
}
spin_unlock_bh(&bat_priv->gw.list_lock);
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 781c5b6e6e8e..508f4416dfc9 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -951,6 +951,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
batadv_check_known_mac_addr(hard_iface->net_dev);
kref_get(&hard_iface->refcount);
list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
+ batadv_hardif_generation++;
return hard_iface;
@@ -993,6 +994,7 @@ void batadv_hardif_remove_interfaces(void)
list_for_each_entry_safe(hard_iface, hard_iface_tmp,
&batadv_hardif_list, list) {
list_del_rcu(&hard_iface->list);
+ batadv_hardif_generation++;
batadv_hardif_remove_interface(hard_iface);
}
rtnl_unlock();
@@ -1054,6 +1056,7 @@ static int batadv_hard_if_event(struct notifier_block *this,
case NETDEV_UNREGISTER:
case NETDEV_PRE_TYPE_CHANGE:
list_del_rcu(&hard_iface->list);
+ batadv_hardif_generation++;
batadv_hardif_remove_interface(hard_iface);
break;
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 7b49e4001778..9194f4d891b1 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -32,6 +32,8 @@ static void batadv_hash_init(struct batadv_hashtable *hash)
INIT_HLIST_HEAD(&hash->table[i]);
spin_lock_init(&hash->list_locks[i]);
}
+
+ atomic_set(&hash->generation, 0);
}
/**
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 9490a7ca2ba6..0e36fa1c7c3e 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -21,6 +21,7 @@
#include "main.h"
+#include <linux/atomic.h>
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/rculist.h>
@@ -58,6 +59,9 @@ struct batadv_hashtable {
/** @size: size of hashtable */
u32 size;
+
+ /** @generation: current (generation) sequence number */
+ atomic_t generation;
};
/* allocates and clears the hash */
@@ -112,6 +116,7 @@ static inline int batadv_hash_add(struct batadv_hashtable *hash,
/* no duplicate found in list, add new element */
hlist_add_head_rcu(data_node, head);
+ atomic_inc(&hash->generation);
ret = 0;
@@ -154,6 +159,7 @@ static inline void *batadv_hash_remove(struct batadv_hashtable *hash,
data_save = node;
hlist_del_rcu(node);
+ atomic_inc(&hash->generation);
break;
}
spin_unlock_bh(&hash->list_locks[index]);
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 6beb5f067810..02e55b78132f 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -43,6 +43,8 @@
#include "debugfs.h"
#include "trace.h"
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+
#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1)
static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN;
@@ -92,33 +94,6 @@ static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log,
return 0;
}
-/**
- * batadv_debug_log() - Add debug log entry
- * @bat_priv: the bat priv with all the soft interface information
- * @fmt: format string
- *
- * Return: 0 on success or negative error number in case of failure
- */
-int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- batadv_fdebug_log(bat_priv->debug_log, "[%10u] %pV",
- jiffies_to_msecs(jiffies), &vaf);
-
- trace_batadv_dbg(bat_priv, &vaf);
-
- va_end(args);
-
- return 0;
-}
-
static int batadv_log_open(struct inode *inode, struct file *file)
{
if (!try_module_get(THIS_MODULE))
@@ -259,3 +234,34 @@ void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
kfree(bat_priv->debug_log);
bat_priv->debug_log = NULL;
}
+
+#endif /* CONFIG_BATMAN_ADV_DEBUGFS */
+
+/**
+ * batadv_debug_log() - Add debug log entry
+ * @bat_priv: the bat priv with all the soft interface information
+ * @fmt: format string
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+ batadv_fdebug_log(bat_priv->debug_log, "[%10u] %pV",
+ jiffies_to_msecs(jiffies), &vaf);
+#endif
+
+ trace_batadv_dbg(bat_priv, &vaf);
+
+ va_end(args);
+
+ return 0;
+}
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 69c0d85bceb3..d1ed839fd32b 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -74,6 +74,7 @@
* list traversals just rcu-locked
*/
struct list_head batadv_hardif_list;
+unsigned int batadv_hardif_generation;
static int (*batadv_rx_handler[256])(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
@@ -186,6 +187,8 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
INIT_HLIST_HEAD(&bat_priv->tp_list);
+ bat_priv->gw.generation = 0;
+
ret = batadv_v_mesh_init(bat_priv);
if (ret < 0)
goto err;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 2002b70e18db..b572066325e4 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -25,7 +25,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2018.4"
+#define BATADV_SOURCE_VERSION "2019.0"
#endif
/* B.A.T.M.A.N. parameters */
@@ -247,6 +247,7 @@ static inline int batadv_print_vid(unsigned short vid)
}
extern struct list_head batadv_hardif_list;
+extern unsigned int batadv_hardif_generation;
extern unsigned char batadv_broadcast_addr[];
extern struct workqueue_struct *batadv_event_workqueue;
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 86725d792e15..69244e4598f5 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1365,22 +1365,26 @@ int batadv_mcast_mesh_info_put(struct sk_buff *msg,
* to a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @orig_node: originator to dump the multicast flags of
*
* Return: 0 or error code.
*/
static int
-batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_orig_node *orig_node)
{
void *hdr;
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI, BATADV_CMD_GET_MCAST_FLAGS);
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
+ BATADV_CMD_GET_MCAST_FLAGS);
if (!hdr)
return -ENOBUFS;
+ genl_dump_check_consistent(cb, hdr);
+
if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
orig_node->orig)) {
genlmsg_cancel(msg, hdr);
@@ -1405,21 +1409,26 @@ batadv_mcast_flags_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
* table to a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
- * @head: bucket to dump
+ * @cb: Control block containing additional options
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
* @idx_skip: How many entries to skip
*
* Return: 0 or error code.
*/
static int
-batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
- struct hlist_head *head, long *idx_skip)
+batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
+ struct batadv_hashtable *hash,
+ unsigned int bucket, long *idx_skip)
{
struct batadv_orig_node *orig_node;
long idx = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+ spin_lock_bh(&hash->list_locks[bucket]);
+ cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+ hlist_for_each_entry(orig_node, &hash->table[bucket], hash_entry) {
if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
&orig_node->capa_initialized))
continue;
@@ -1427,9 +1436,8 @@ batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
if (idx < *idx_skip)
goto skip;
- if (batadv_mcast_flags_dump_entry(msg, portid, seq,
- orig_node)) {
- rcu_read_unlock();
+ if (batadv_mcast_flags_dump_entry(msg, portid, cb, orig_node)) {
+ spin_unlock_bh(&hash->list_locks[bucket]);
*idx_skip = idx;
return -EMSGSIZE;
@@ -1438,7 +1446,7 @@ batadv_mcast_flags_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
skip:
idx++;
}
- rcu_read_unlock();
+ spin_unlock_bh(&hash->list_locks[bucket]);
return 0;
}
@@ -1447,7 +1455,7 @@ skip:
* __batadv_mcast_flags_dump() - dump multicast flags table to a netlink socket
* @msg: buffer for the message
* @portid: netlink port
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @bat_priv: the bat priv with all the soft interface information
* @bucket: current bucket to dump
* @idx: index in current bucket to the next entry to dump
@@ -1455,19 +1463,17 @@ skip:
* Return: 0 or error code.
*/
static int
-__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid, u32 seq,
+__batadv_mcast_flags_dump(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_priv *bat_priv, long *bucket, long *idx)
{
struct batadv_hashtable *hash = bat_priv->orig_hash;
long bucket_tmp = *bucket;
- struct hlist_head *head;
long idx_tmp = *idx;
while (bucket_tmp < hash->size) {
- head = &hash->table[bucket_tmp];
-
- if (batadv_mcast_flags_dump_bucket(msg, portid, seq, head,
- &idx_tmp))
+ if (batadv_mcast_flags_dump_bucket(msg, portid, cb, hash,
+ *bucket, &idx_tmp))
break;
bucket_tmp++;
@@ -1550,8 +1556,7 @@ int batadv_mcast_flags_dump(struct sk_buff *msg, struct netlink_callback *cb)
return ret;
bat_priv = netdev_priv(primary_if->soft_iface);
- ret = __batadv_mcast_flags_dump(msg, portid, cb->nlh->nlmsg_seq,
- bat_priv, bucket, idx);
+ ret = __batadv_mcast_flags_dump(msg, portid, cb, bat_priv, bucket, idx);
batadv_hardif_put(primary_if);
return ret;
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 0d9459b69bdb..2dc3304cee54 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -29,11 +29,11 @@
#include <linux/if_ether.h>
#include <linux/init.h>
#include <linux/kernel.h>
+#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/printk.h>
-#include <linux/rculist.h>
-#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/stddef.h>
#include <linux/types.h>
@@ -445,23 +445,27 @@ out:
* batadv_netlink_dump_hardif_entry() - Dump one hard interface into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @hard_iface: Hard interface to dump
*
* Return: error code, or 0 on success
*/
static int
-batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_hard_iface *hard_iface)
{
struct net_device *net_dev = hard_iface->net_dev;
void *hdr;
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
BATADV_CMD_GET_HARDIFS);
if (!hdr)
return -EMSGSIZE;
+ genl_dump_check_consistent(cb, hdr);
+
if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
net_dev->ifindex) ||
nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
@@ -498,7 +502,6 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
struct batadv_hard_iface *hard_iface;
int ifindex;
int portid = NETLINK_CB(cb->skb).portid;
- int seq = cb->nlh->nlmsg_seq;
int skip = cb->args[0];
int i = 0;
@@ -516,23 +519,24 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
return -ENODEV;
}
- rcu_read_lock();
+ rtnl_lock();
+ cb->seq = batadv_hardif_generation << 1 | 1;
- list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
if (hard_iface->soft_iface != soft_iface)
continue;
if (i++ < skip)
continue;
- if (batadv_netlink_dump_hardif_entry(msg, portid, seq,
+ if (batadv_netlink_dump_hardif_entry(msg, portid, cb,
hard_iface)) {
i--;
break;
}
}
- rcu_read_unlock();
+ rtnl_unlock();
dev_put(soft_iface);
diff --git a/net/batman-adv/trace.c b/net/batman-adv/trace.c
index 3d57f9981f25..8e1024217cff 100644
--- a/net/batman-adv/trace.c
+++ b/net/batman-adv/trace.c
@@ -16,7 +16,5 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include <linux/module.h>
-
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
index 3acda26a30ca..104784be94d7 100644
--- a/net/batman-adv/trace.h
+++ b/net/batman-adv/trace.h
@@ -21,7 +21,13 @@
#include "main.h"
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/percpu.h>
+#include <linux/printk.h>
#include <linux/tracepoint.h>
+#include <linux/types.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM batadv
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index d21624c44665..8dcd4968cde7 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1145,14 +1145,15 @@ out:
* batadv_tt_local_dump_entry() - Dump one TT local entry into a message
* @msg :Netlink message to dump into
* @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
* @common: tt local & tt global common data
*
* Return: Error code, or 0 on success
*/
static int
-batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_priv *bat_priv,
struct batadv_tt_common_entry *common)
{
@@ -1173,12 +1174,14 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
batadv_softif_vlan_put(vlan);
- hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
- NLM_F_MULTI,
+ hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
+ &batadv_netlink_family, NLM_F_MULTI,
BATADV_CMD_GET_TRANSTABLE_LOCAL);
if (!hdr)
return -ENOBUFS;
+ genl_dump_check_consistent(cb, hdr);
+
if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
@@ -1201,34 +1204,39 @@ batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
* batadv_tt_local_dump_bucket() - Dump one TT local bucket into a message
* @msg: Netlink message to dump into
* @portid: Port making netlink request
- * @seq: Sequence number of netlink message
+ * @cb: Control block containing additional options
* @bat_priv: The bat priv with all the soft interface information
- * @head: Pointer to the list containing the local tt entries
+ * @hash: hash to dump
+ * @bucket: bucket index to dump
* @idx_s: Number of entries to skip
*
* Return: Error code, or 0 on success
*/
static int
-batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid,
+ struct netlink_callback *cb,
struct batadv_priv *bat_priv,
- struct hlist_head *head, int *idx_s)
+ struct batadv_hashtable *hash, unsigned int bucket,
+ int *idx_s)
{
struct batadv_tt_common_entry *common;
int idx = 0;
- rcu_read_lock();
- hlist_for_each_entry_rcu(common, head, hash_entry) {
+ spin_lock_bh(&hash->list_locks[bucket]);
+ cb->seq = atomic_read(&hash->generation) << 1 | 1;
+
+ hlist_for_each_entry(common, &hash->table[bucket], hash_entry) {
if (idx++ < *idx_s)
continue;
- if (batadv_tt_local_dump_entry(msg, portid, seq, bat_priv,
+ if (batadv_tt_local_dump_entry(msg, portid, cb, bat_priv,
common)) {
- rcu_read_unlock();
+ spin_unlock_bh(&hash->list_locks[bucket]);
*idx_s = idx - 1;
return -EMSGSIZE;
}
}
- rcu_read_unlock();
+ spin_unlock_bh(&hash->list_locks[bucket]);
*idx_s = 0;
return 0;
@@ -1248,7 +1256,6 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
struct batadv_priv *bat_priv;
struct batadv_hard_iface *primary_if = NULL;
struct batadv_hashtable *hash;
- struct hlist_head *head;
int ret;
int ifindex;
int bucket = cb->args[0];
@@ -1276,10 +1283,8 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
hash = bat_priv->tt.local_hash;
while (bucket < hash->size) {
- head = &hash->table[bucket];
-
- if (batadv_tt_local_dump_bucket(msg, portid, cb->nlh->nlmsg_seq,
- bat_priv, head, &idx))
+ if (batadv_tt_local_dump_bucket(msg, portid, cb, bat_priv,
+ hash, bucket, &idx))
break;
bucket++;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 45b5592de816..cbe17da36fcb 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1096,12 +1096,15 @@ struct batadv_priv_gw {
/** @gateway_list: list of available gateway nodes */
struct hlist_head gateway_list;
- /** @list_lock: lock protecting gateway_list & curr_gw */
+ /** @list_lock: lock protecting gateway_list, curr_gw, generation */
spinlock_t list_lock;
/** @curr_gw: pointer to currently selected gateway node */
struct batadv_gw_node __rcu *curr_gw;
+ /** @generation: current (generation) sequence number */
+ unsigned int generation;
+
/**
* @mode: gateway operation: off, client or server (see batadv_gw_modes)
*/
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 828e87fe8027..9d79c7de234a 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -607,7 +607,7 @@ static void ifup(struct net_device *netdev)
int err;
rtnl_lock();
- err = dev_open(netdev);
+ err = dev_open(netdev, NULL);
if (err < 0)
BT_INFO("iface %s cannot be opened (%d)", netdev->name, err);
rtnl_unlock();
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index ef9928d7b4fb..ac2826ce162b 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5711,6 +5711,12 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
return true;
}
+ /* Check if request ended in Command Status - no way to retreive
+ * any extra parameters in this case.
+ */
+ if (hdr->evt == HCI_EV_CMD_STATUS)
+ return false;
+
if (hdr->evt != HCI_EV_CMD_COMPLETE) {
bt_dev_err(hdev, "last event is not cmd complete (0x%2.2x)",
hdr->evt);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index e8c9ef1e1922..ca73d36cc149 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1556,7 +1556,7 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) ||
mgmt_get_connectable(hdev);
- if (!is_advertising_allowed(hdev, connectable))
+ if (!is_advertising_allowed(hdev, connectable))
return -EPERM;
/* Set require_privacy to true only when non-connectable
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 2146e0f3b6f8..2a7fb517d460 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7650,17 +7650,7 @@ static int l2cap_debugfs_show(struct seq_file *f, void *p)
return 0;
}
-static int l2cap_debugfs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, l2cap_debugfs_show, inode->i_private);
-}
-
-static const struct file_operations l2cap_debugfs_fops = {
- .open = l2cap_debugfs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(l2cap_debugfs);
static struct dentry *l2cap_debugfs;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index b98225d65e87..1a635df80643 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2166,17 +2166,7 @@ static int rfcomm_dlc_debugfs_show(struct seq_file *f, void *x)
return 0;
}
-static int rfcomm_dlc_debugfs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, rfcomm_dlc_debugfs_show, inode->i_private);
-}
-
-static const struct file_operations rfcomm_dlc_debugfs_fops = {
- .open = rfcomm_dlc_debugfs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(rfcomm_dlc_debugfs);
static struct dentry *rfcomm_dlc_debugfs;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index d606e9212291..aa0db1d1bd9b 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1020,17 +1020,7 @@ static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p)
return 0;
}
-static int rfcomm_sock_debugfs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, rfcomm_sock_debugfs_show, inode->i_private);
-}
-
-static const struct file_operations rfcomm_sock_debugfs_fops = {
- .open = rfcomm_sock_debugfs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(rfcomm_sock_debugfs);
static struct dentry *rfcomm_sock_debugfs;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 8f0f9279eac9..529b38996d8b 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1173,17 +1173,7 @@ static int sco_debugfs_show(struct seq_file *f, void *p)
return 0;
}
-static int sco_debugfs_open(struct inode *inode, struct file *file)
-{
- return single_open(file, sco_debugfs_show, inode->i_private);
-}
-
-static const struct file_operations sco_debugfs_fops = {
- .open = sco_debugfs_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(sco_debugfs);
static struct dentry *sco_debugfs;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 25001913d03b..fa2644d276ef 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -75,8 +75,18 @@ static int bpf_test_finish(const union bpf_attr *kattr,
{
void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
int err = -EFAULT;
+ u32 copy_size = size;
+
+ /* Clamp copy if the user has provided a size hint, but copy the full
+ * buffer if not to retain old behaviour.
+ */
+ if (kattr->test.data_size_out &&
+ copy_size > kattr->test.data_size_out) {
+ copy_size = kattr->test.data_size_out;
+ err = -ENOSPC;
+ }
- if (data_out && copy_to_user(data_out, data, size))
+ if (data_out && copy_to_user(data_out, data, copy_size))
goto out;
if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
goto out;
@@ -84,7 +94,8 @@ static int bpf_test_finish(const union bpf_attr *kattr,
goto out;
if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration)))
goto out;
- err = 0;
+ if (err != -ENOSPC)
+ err = 0;
out:
return err;
}
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 360ad66c21e9..a5174e5001d8 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -31,6 +31,8 @@
*/
static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
{
+ struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr);
+ struct netdev_notifier_pre_changeaddr_info *prechaddr_info;
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net_bridge_port *p;
struct net_bridge *br;
@@ -56,6 +58,17 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
br_mtu_auto_adjust(br);
break;
+ case NETDEV_PRE_CHANGEADDR:
+ if (br->dev->addr_assign_type == NET_ADDR_SET)
+ break;
+ prechaddr_info = ptr;
+ err = dev_pre_changeaddr_notify(br->dev,
+ prechaddr_info->dev_addr,
+ extack);
+ if (err)
+ return notifier_from_errno(err);
+ break;
+
case NETDEV_CHANGEADDR:
spin_lock_bh(&br->lock);
br_fdb_changeaddr(p, dev->dev_addr);
@@ -175,6 +188,82 @@ static struct notifier_block br_switchdev_notifier = {
.notifier_call = br_switchdev_event,
};
+/* br_boolopt_toggle - change user-controlled boolean option
+ *
+ * @br: bridge device
+ * @opt: id of the option to change
+ * @on: new option value
+ * @extack: extack for error messages
+ *
+ * Changes the value of the respective boolean option to @on taking care of
+ * any internal option value mapping and configuration.
+ */
+int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
+ struct netlink_ext_ack *extack)
+{
+ switch (opt) {
+ case BR_BOOLOPT_NO_LL_LEARN:
+ br_opt_toggle(br, BROPT_NO_LL_LEARN, on);
+ break;
+ default:
+ /* shouldn't be called with unsupported options */
+ WARN_ON(1);
+ break;
+ }
+
+ return 0;
+}
+
+int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
+{
+ switch (opt) {
+ case BR_BOOLOPT_NO_LL_LEARN:
+ return br_opt_get(br, BROPT_NO_LL_LEARN);
+ default:
+ /* shouldn't be called with unsupported options */
+ WARN_ON(1);
+ break;
+ }
+
+ return 0;
+}
+
+int br_boolopt_multi_toggle(struct net_bridge *br,
+ struct br_boolopt_multi *bm,
+ struct netlink_ext_ack *extack)
+{
+ unsigned long bitmap = bm->optmask;
+ int err = 0;
+ int opt_id;
+
+ for_each_set_bit(opt_id, &bitmap, BR_BOOLOPT_MAX) {
+ bool on = !!(bm->optval & BIT(opt_id));
+
+ err = br_boolopt_toggle(br, opt_id, on, extack);
+ if (err) {
+ br_debug(br, "boolopt multi-toggle error: option: %d current: %d new: %d error: %d\n",
+ opt_id, br_boolopt_get(br, opt_id), on, err);
+ break;
+ }
+ }
+
+ return err;
+}
+
+void br_boolopt_multi_get(const struct net_bridge *br,
+ struct br_boolopt_multi *bm)
+{
+ u32 optval = 0;
+ int opt_id;
+
+ for (opt_id = 0; opt_id < BR_BOOLOPT_MAX; opt_id++)
+ optval |= (br_boolopt_get(br, opt_id) << opt_id);
+
+ bm->optval = optval;
+ bm->optmask = GENMASK((BR_BOOLOPT_MAX - 1), 0);
+}
+
+/* private bridge options, controlled by the kernel */
void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
{
bool cur = !!br_opt_get(br, opt);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index c6abf927f0c9..013323b6dbe4 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -131,9 +131,17 @@ static int br_dev_init(struct net_device *dev)
return err;
}
+ err = br_mdb_hash_init(br);
+ if (err) {
+ free_percpu(br->stats);
+ br_fdb_hash_fini(br);
+ return err;
+ }
+
err = br_vlan_init(br);
if (err) {
free_percpu(br->stats);
+ br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
return err;
}
@@ -142,6 +150,7 @@ static int br_dev_init(struct net_device *dev)
if (err) {
free_percpu(br->stats);
br_vlan_flush(br);
+ br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
}
br_set_lockdep_class(dev);
@@ -156,6 +165,7 @@ static void br_dev_uninit(struct net_device *dev)
br_multicast_dev_del(br);
br_multicast_uninit_stats(br);
br_vlan_flush(br);
+ br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
free_percpu(br->stats);
}
@@ -393,6 +403,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fdb_add = br_fdb_add,
.ndo_fdb_del = br_fdb_delete,
.ndo_fdb_dump = br_fdb_dump,
+ .ndo_fdb_get = br_fdb_get,
.ndo_bridge_getlink = br_getlink,
.ndo_bridge_setlink = br_setlink,
.ndo_bridge_dellink = br_dellink,
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e56ba3912a90..fe3c758791ca 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -773,6 +773,32 @@ skip:
return err;
}
+int br_fdb_get(struct sk_buff *skb,
+ struct nlattr *tb[],
+ struct net_device *dev,
+ const unsigned char *addr,
+ u16 vid, u32 portid, u32 seq,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct net_bridge_fdb_entry *f;
+ int err = 0;
+
+ rcu_read_lock();
+ f = br_fdb_find_rcu(br, addr, vid);
+ if (!f) {
+ NL_SET_ERR_MSG(extack, "Fdb entry not found");
+ err = -ENOENT;
+ goto errout;
+ }
+
+ err = fdb_fill_info(skb, br, f, portid, seq,
+ RTM_NEWNEIGH, 0);
+errout:
+ rcu_read_unlock();
+ return err;
+}
+
/* Update (create or replace) forwarding database entry */
static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
const u8 *addr, u16 state, u16 flags, u16 vid,
@@ -1164,3 +1190,23 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
spin_unlock_bh(&br->hash_lock);
}
+
+void br_fdb_clear_offload(const struct net_device *dev, u16 vid)
+{
+ struct net_bridge_fdb_entry *f;
+ struct net_bridge_port *p;
+
+ ASSERT_RTNL();
+
+ p = br_port_get_rtnl(dev);
+ if (!p)
+ return;
+
+ spin_lock_bh(&p->br->hash_lock);
+ hlist_for_each_entry(f, &p->br->fdb_list, fdb_node) {
+ if (f->dst == p && f->key.vlan_id == vid)
+ f->offloaded = 0;
+ }
+ spin_unlock_bh(&p->br->hash_lock);
+}
+EXPORT_SYMBOL_GPL(br_fdb_clear_offload);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 9b46d2dc4c22..41f0a696a65f 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -650,7 +650,16 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
if (br_fdb_insert(br, p, dev->dev_addr, 0))
netdev_err(dev, "failed insert local address bridge forwarding table\n");
- err = nbp_vlan_init(p);
+ if (br->dev->addr_assign_type != NET_ADDR_SET) {
+ /* Ask for permission to use this MAC address now, even if we
+ * don't end up choosing it below.
+ */
+ err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack);
+ if (err)
+ goto err7;
+ }
+
+ err = nbp_vlan_init(p, extack);
if (err) {
netdev_err(dev, "failed to initialize vlan filtering on this port\n");
goto err7;
@@ -741,3 +750,15 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
if (mask & BR_NEIGH_SUPPRESS)
br_recalculate_neigh_suppress_enabled(br);
}
+
+bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
+{
+ struct net_bridge_port *p;
+
+ p = br_port_get_rtnl_rcu(dev);
+ if (!p)
+ return false;
+
+ return p->flags & flag;
+}
+EXPORT_SYMBOL_GPL(br_port_flag_is_set);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 3ddca11f44c2..5ea7e56119c1 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -188,7 +188,9 @@ static void __br_handle_local_finish(struct sk_buff *skb)
u16 vid = 0;
/* check if vlan is allowed, to avoid spoofing */
- if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid))
+ if ((p->flags & BR_LEARNING) &&
+ !br_opt_get(p->br, BROPT_NO_LL_LEARN) &&
+ br_should_learn(p, skb, &vid))
br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 596ec6e7df11..f69c8d91dc81 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -78,82 +78,72 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev)
{
+ int idx = 0, s_idx = cb->args[1], err = 0;
struct net_bridge *br = netdev_priv(dev);
- struct net_bridge_mdb_htable *mdb;
+ struct net_bridge_mdb_entry *mp;
struct nlattr *nest, *nest2;
- int i, err = 0;
- int idx = 0, s_idx = cb->args[1];
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return 0;
- mdb = rcu_dereference(br->mdb);
- if (!mdb)
- return 0;
-
nest = nla_nest_start(skb, MDBA_MDB);
if (nest == NULL)
return -EMSGSIZE;
- for (i = 0; i < mdb->max; i++) {
- struct net_bridge_mdb_entry *mp;
+ hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
struct net_bridge_port *port;
- hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) {
- if (idx < s_idx)
- goto skip;
+ if (idx < s_idx)
+ goto skip;
- nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
- if (nest2 == NULL) {
- err = -EMSGSIZE;
- goto out;
- }
+ nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
+ if (!nest2) {
+ err = -EMSGSIZE;
+ break;
+ }
- for (pp = &mp->ports;
- (p = rcu_dereference(*pp)) != NULL;
- pp = &p->next) {
- struct nlattr *nest_ent;
- struct br_mdb_entry e;
-
- port = p->port;
- if (!port)
- continue;
-
- memset(&e, 0, sizeof(e));
- e.ifindex = port->dev->ifindex;
- e.vid = p->addr.vid;
- __mdb_entry_fill_flags(&e, p->flags);
- if (p->addr.proto == htons(ETH_P_IP))
- e.addr.u.ip4 = p->addr.u.ip4;
+ for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+ pp = &p->next) {
+ struct nlattr *nest_ent;
+ struct br_mdb_entry e;
+
+ port = p->port;
+ if (!port)
+ continue;
+
+ memset(&e, 0, sizeof(e));
+ e.ifindex = port->dev->ifindex;
+ e.vid = p->addr.vid;
+ __mdb_entry_fill_flags(&e, p->flags);
+ if (p->addr.proto == htons(ETH_P_IP))
+ e.addr.u.ip4 = p->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
- if (p->addr.proto == htons(ETH_P_IPV6))
- e.addr.u.ip6 = p->addr.u.ip6;
+ if (p->addr.proto == htons(ETH_P_IPV6))
+ e.addr.u.ip6 = p->addr.u.ip6;
#endif
- e.addr.proto = p->addr.proto;
- nest_ent = nla_nest_start(skb,
- MDBA_MDB_ENTRY_INFO);
- if (!nest_ent) {
- nla_nest_cancel(skb, nest2);
- err = -EMSGSIZE;
- goto out;
- }
- if (nla_put_nohdr(skb, sizeof(e), &e) ||
- nla_put_u32(skb,
- MDBA_MDB_EATTR_TIMER,
- br_timer_value(&p->timer))) {
- nla_nest_cancel(skb, nest_ent);
- nla_nest_cancel(skb, nest2);
- err = -EMSGSIZE;
- goto out;
- }
- nla_nest_end(skb, nest_ent);
+ e.addr.proto = p->addr.proto;
+ nest_ent = nla_nest_start(skb, MDBA_MDB_ENTRY_INFO);
+ if (!nest_ent) {
+ nla_nest_cancel(skb, nest2);
+ err = -EMSGSIZE;
+ goto out;
}
- nla_nest_end(skb, nest2);
- skip:
- idx++;
+ if (nla_put_nohdr(skb, sizeof(e), &e) ||
+ nla_put_u32(skb,
+ MDBA_MDB_EATTR_TIMER,
+ br_timer_value(&p->timer))) {
+ nla_nest_cancel(skb, nest_ent);
+ nla_nest_cancel(skb, nest2);
+ err = -EMSGSIZE;
+ goto out;
+ }
+ nla_nest_end(skb, nest_ent);
}
+ nla_nest_end(skb, nest2);
+skip:
+ idx++;
}
out:
@@ -203,8 +193,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
- /* In theory this could be wrapped to 0... */
- cb->seq = net->dev_base_seq + br_mdb_rehash_seq;
+ cb->seq = net->dev_base_seq;
for_each_netdev_rcu(net, dev) {
if (dev->priv_flags & IFF_EBRIDGE) {
@@ -297,7 +286,6 @@ static void br_mdb_complete(struct net_device *dev, int err, void *priv)
struct br_mdb_complete_info *data = priv;
struct net_bridge_port_group __rcu **pp;
struct net_bridge_port_group *p;
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port *port = data->port;
struct net_bridge *br = port->br;
@@ -306,8 +294,7 @@ static void br_mdb_complete(struct net_device *dev, int err, void *priv)
goto err;
spin_lock_bh(&br->multicast_lock);
- mdb = mlock_dereference(br->mdb, br);
- mp = br_mdb_ip_get(mdb, &data->ip);
+ mp = br_mdb_ip_get(br, &data->ip);
if (!mp)
goto out;
for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
@@ -344,7 +331,7 @@ static void br_mdb_switchdev_host_port(struct net_device *dev,
mdb.obj.orig_dev = dev;
switch (type) {
case RTM_NEWMDB:
- switchdev_port_obj_add(lower_dev, &mdb.obj);
+ switchdev_port_obj_add(lower_dev, &mdb.obj, NULL);
break;
case RTM_DELMDB:
switchdev_port_obj_del(lower_dev, &mdb.obj);
@@ -394,7 +381,7 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
__mdb_entry_to_br_ip(entry, &complete_info->ip);
mdb.obj.complete_priv = complete_info;
mdb.obj.complete = br_mdb_complete;
- if (switchdev_port_obj_add(port_dev, &mdb.obj))
+ if (switchdev_port_obj_add(port_dev, &mdb.obj, NULL))
kfree(complete_info);
}
} else if (p && port_dev && type == RTM_DELMDB) {
@@ -588,14 +575,12 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
- struct net_bridge_mdb_htable *mdb;
unsigned long now = jiffies;
int err;
- mdb = mlock_dereference(br->mdb, br);
- mp = br_mdb_ip_get(mdb, group);
+ mp = br_mdb_ip_get(br, group);
if (!mp) {
- mp = br_multicast_new_group(br, port, group);
+ mp = br_multicast_new_group(br, group);
err = PTR_ERR_OR_ZERO(mp);
if (err)
return err;
@@ -696,7 +681,6 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
{
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
@@ -709,9 +693,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
__mdb_entry_to_br_ip(entry, &ip);
spin_lock_bh(&br->multicast_lock);
- mdb = mlock_dereference(br->mdb, br);
-
- mp = br_mdb_ip_get(mdb, &ip);
+ mp = br_mdb_ip_get(br, &ip);
if (!mp)
goto unlock;
@@ -728,7 +710,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
- call_rcu(&p->rcu, br_multicast_free_pg);
+ kfree_rcu(p, rcu);
err = 0;
if (!mp->ports && !mp->host_joined &&
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 0255223f2001..3aeff0895669 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -37,6 +37,14 @@
#include "br_private.h"
+static const struct rhashtable_params br_mdb_rht_params = {
+ .head_offset = offsetof(struct net_bridge_mdb_entry, rhnode),
+ .key_offset = offsetof(struct net_bridge_mdb_entry, addr),
+ .key_len = sizeof(struct br_ip),
+ .automatic_shrinking = true,
+ .locks_mul = 1,
+};
+
static void br_multicast_start_querier(struct net_bridge *br,
struct bridge_mcast_own_query *query);
static void br_multicast_add_router(struct net_bridge *br,
@@ -54,7 +62,6 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
const struct in6_addr *group,
__u16 vid, const unsigned char *src);
#endif
-unsigned int br_mdb_rehash_seq;
static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
{
@@ -73,89 +80,58 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
return 0;
}
-static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip,
- __u16 vid)
-{
- return jhash_2words((__force u32)ip, vid, mdb->secret) & (mdb->max - 1);
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
- const struct in6_addr *ip,
- __u16 vid)
+static struct net_bridge_mdb_entry *br_mdb_ip_get_rcu(struct net_bridge *br,
+ struct br_ip *dst)
{
- return jhash_2words(ipv6_addr_hash(ip), vid,
- mdb->secret) & (mdb->max - 1);
+ return rhashtable_lookup(&br->mdb_hash_tbl, dst, br_mdb_rht_params);
}
-#endif
-static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
- struct br_ip *ip)
-{
- switch (ip->proto) {
- case htons(ETH_P_IP):
- return __br_ip4_hash(mdb, ip->u.ip4, ip->vid);
-#if IS_ENABLED(CONFIG_IPV6)
- case htons(ETH_P_IPV6):
- return __br_ip6_hash(mdb, &ip->u.ip6, ip->vid);
-#endif
- }
- return 0;
-}
-
-static struct net_bridge_mdb_entry *__br_mdb_ip_get(
- struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash)
+struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge *br,
+ struct br_ip *dst)
{
- struct net_bridge_mdb_entry *mp;
-
- hlist_for_each_entry_rcu(mp, &mdb->mhash[hash], hlist[mdb->ver]) {
- if (br_ip_equal(&mp->addr, dst))
- return mp;
- }
+ struct net_bridge_mdb_entry *ent;
- return NULL;
-}
+ lockdep_assert_held_once(&br->multicast_lock);
-struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge_mdb_htable *mdb,
- struct br_ip *dst)
-{
- if (!mdb)
- return NULL;
+ rcu_read_lock();
+ ent = rhashtable_lookup(&br->mdb_hash_tbl, dst, br_mdb_rht_params);
+ rcu_read_unlock();
- return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
+ return ent;
}
-static struct net_bridge_mdb_entry *br_mdb_ip4_get(
- struct net_bridge_mdb_htable *mdb, __be32 dst, __u16 vid)
+static struct net_bridge_mdb_entry *br_mdb_ip4_get(struct net_bridge *br,
+ __be32 dst, __u16 vid)
{
struct br_ip br_dst;
+ memset(&br_dst, 0, sizeof(br_dst));
br_dst.u.ip4 = dst;
br_dst.proto = htons(ETH_P_IP);
br_dst.vid = vid;
- return br_mdb_ip_get(mdb, &br_dst);
+ return br_mdb_ip_get(br, &br_dst);
}
#if IS_ENABLED(CONFIG_IPV6)
-static struct net_bridge_mdb_entry *br_mdb_ip6_get(
- struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst,
- __u16 vid)
+static struct net_bridge_mdb_entry *br_mdb_ip6_get(struct net_bridge *br,
+ const struct in6_addr *dst,
+ __u16 vid)
{
struct br_ip br_dst;
+ memset(&br_dst, 0, sizeof(br_dst));
br_dst.u.ip6 = *dst;
br_dst.proto = htons(ETH_P_IPV6);
br_dst.vid = vid;
- return br_mdb_ip_get(mdb, &br_dst);
+ return br_mdb_ip_get(br, &br_dst);
}
#endif
struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
struct sk_buff *skb, u16 vid)
{
- struct net_bridge_mdb_htable *mdb = rcu_dereference(br->mdb);
struct br_ip ip;
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
@@ -164,6 +140,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
if (BR_INPUT_SKB_CB(skb)->igmp)
return NULL;
+ memset(&ip, 0, sizeof(ip));
ip.proto = skb->protocol;
ip.vid = vid;
@@ -180,70 +157,13 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
return NULL;
}
- return br_mdb_ip_get(mdb, &ip);
-}
-
-static void br_mdb_free(struct rcu_head *head)
-{
- struct net_bridge_mdb_htable *mdb =
- container_of(head, struct net_bridge_mdb_htable, rcu);
- struct net_bridge_mdb_htable *old = mdb->old;
-
- mdb->old = NULL;
- kfree(old->mhash);
- kfree(old);
-}
-
-static int br_mdb_copy(struct net_bridge_mdb_htable *new,
- struct net_bridge_mdb_htable *old,
- int elasticity)
-{
- struct net_bridge_mdb_entry *mp;
- int maxlen;
- int len;
- int i;
-
- for (i = 0; i < old->max; i++)
- hlist_for_each_entry(mp, &old->mhash[i], hlist[old->ver])
- hlist_add_head(&mp->hlist[new->ver],
- &new->mhash[br_ip_hash(new, &mp->addr)]);
-
- if (!elasticity)
- return 0;
-
- maxlen = 0;
- for (i = 0; i < new->max; i++) {
- len = 0;
- hlist_for_each_entry(mp, &new->mhash[i], hlist[new->ver])
- len++;
- if (len > maxlen)
- maxlen = len;
- }
-
- return maxlen > elasticity ? -EINVAL : 0;
-}
-
-void br_multicast_free_pg(struct rcu_head *head)
-{
- struct net_bridge_port_group *p =
- container_of(head, struct net_bridge_port_group, rcu);
-
- kfree(p);
-}
-
-static void br_multicast_free_group(struct rcu_head *head)
-{
- struct net_bridge_mdb_entry *mp =
- container_of(head, struct net_bridge_mdb_entry, rcu);
-
- kfree(mp);
+ return br_mdb_ip_get_rcu(br, &ip);
}
static void br_multicast_group_expired(struct timer_list *t)
{
struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer);
struct net_bridge *br = mp->br;
- struct net_bridge_mdb_htable *mdb;
spin_lock(&br->multicast_lock);
if (!netif_running(br->dev) || timer_pending(&mp->timer))
@@ -255,12 +175,11 @@ static void br_multicast_group_expired(struct timer_list *t)
if (mp->ports)
goto out;
- mdb = mlock_dereference(br->mdb, br);
-
- hlist_del_rcu(&mp->hlist[mdb->ver]);
- mdb->size--;
+ rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode,
+ br_mdb_rht_params);
+ hlist_del_rcu(&mp->mdb_node);
- call_rcu(&mp->rcu, br_multicast_free_group);
+ kfree_rcu(mp, rcu);
out:
spin_unlock(&br->multicast_lock);
@@ -269,14 +188,11 @@ out:
static void br_multicast_del_pg(struct net_bridge *br,
struct net_bridge_port_group *pg)
{
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
- mdb = mlock_dereference(br->mdb, br);
-
- mp = br_mdb_ip_get(mdb, &pg->addr);
+ mp = br_mdb_ip_get(br, &pg->addr);
if (WARN_ON(!mp))
return;
@@ -291,7 +207,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
del_timer(&p->timer);
br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB,
p->flags);
- call_rcu(&p->rcu, br_multicast_free_pg);
+ kfree_rcu(p, rcu);
if (!mp->ports && !mp->host_joined &&
netif_running(br->dev))
@@ -319,53 +235,6 @@ out:
spin_unlock(&br->multicast_lock);
}
-static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max,
- int elasticity)
-{
- struct net_bridge_mdb_htable *old = rcu_dereference_protected(*mdbp, 1);
- struct net_bridge_mdb_htable *mdb;
- int err;
-
- mdb = kmalloc(sizeof(*mdb), GFP_ATOMIC);
- if (!mdb)
- return -ENOMEM;
-
- mdb->max = max;
- mdb->old = old;
-
- mdb->mhash = kcalloc(max, sizeof(*mdb->mhash), GFP_ATOMIC);
- if (!mdb->mhash) {
- kfree(mdb);
- return -ENOMEM;
- }
-
- mdb->size = old ? old->size : 0;
- mdb->ver = old ? old->ver ^ 1 : 0;
-
- if (!old || elasticity)
- get_random_bytes(&mdb->secret, sizeof(mdb->secret));
- else
- mdb->secret = old->secret;
-
- if (!old)
- goto out;
-
- err = br_mdb_copy(mdb, old, elasticity);
- if (err) {
- kfree(mdb->mhash);
- kfree(mdb);
- return err;
- }
-
- br_mdb_rehash_seq++;
- call_rcu(&mdb->rcu, br_mdb_free);
-
-out:
- rcu_assign_pointer(*mdbp, mdb);
-
- return 0;
-}
-
static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
__be32 group,
u8 *igmp_type)
@@ -589,111 +458,19 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
return NULL;
}
-static struct net_bridge_mdb_entry *br_multicast_get_group(
- struct net_bridge *br, struct net_bridge_port *port,
- struct br_ip *group, int hash)
-{
- struct net_bridge_mdb_htable *mdb;
- struct net_bridge_mdb_entry *mp;
- unsigned int count = 0;
- unsigned int max;
- int elasticity;
- int err;
-
- mdb = rcu_dereference_protected(br->mdb, 1);
- hlist_for_each_entry(mp, &mdb->mhash[hash], hlist[mdb->ver]) {
- count++;
- if (unlikely(br_ip_equal(group, &mp->addr)))
- return mp;
- }
-
- elasticity = 0;
- max = mdb->max;
-
- if (unlikely(count > br->hash_elasticity && count)) {
- if (net_ratelimit())
- br_info(br, "Multicast hash table "
- "chain limit reached: %s\n",
- port ? port->dev->name : br->dev->name);
-
- elasticity = br->hash_elasticity;
- }
-
- if (mdb->size >= max) {
- max *= 2;
- if (unlikely(max > br->hash_max)) {
- br_warn(br, "Multicast hash table maximum of %d "
- "reached, disabling snooping: %s\n",
- br->hash_max,
- port ? port->dev->name : br->dev->name);
- err = -E2BIG;
-disable:
- br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
- goto err;
- }
- }
-
- if (max > mdb->max || elasticity) {
- if (mdb->old) {
- if (net_ratelimit())
- br_info(br, "Multicast hash table "
- "on fire: %s\n",
- port ? port->dev->name : br->dev->name);
- err = -EEXIST;
- goto err;
- }
-
- err = br_mdb_rehash(&br->mdb, max, elasticity);
- if (err) {
- br_warn(br, "Cannot rehash multicast "
- "hash table, disabling snooping: %s, %d, %d\n",
- port ? port->dev->name : br->dev->name,
- mdb->size, err);
- goto disable;
- }
-
- err = -EAGAIN;
- goto err;
- }
-
- return NULL;
-
-err:
- mp = ERR_PTR(err);
- return mp;
-}
-
struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
- struct net_bridge_port *p,
struct br_ip *group)
{
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
- int hash;
int err;
- mdb = rcu_dereference_protected(br->mdb, 1);
- if (!mdb) {
- err = br_mdb_rehash(&br->mdb, BR_HASH_SIZE, 0);
- if (err)
- return ERR_PTR(err);
- goto rehash;
- }
-
- hash = br_ip_hash(mdb, group);
- mp = br_multicast_get_group(br, p, group, hash);
- switch (PTR_ERR(mp)) {
- case 0:
- break;
-
- case -EAGAIN:
-rehash:
- mdb = rcu_dereference_protected(br->mdb, 1);
- hash = br_ip_hash(mdb, group);
- break;
+ mp = br_mdb_ip_get(br, group);
+ if (mp)
+ return mp;
- default:
- goto out;
+ if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
+ br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
+ return ERR_PTR(-E2BIG);
}
mp = kzalloc(sizeof(*mp), GFP_ATOMIC);
@@ -703,11 +480,15 @@ rehash:
mp->br = br;
mp->addr = *group;
timer_setup(&mp->timer, br_multicast_group_expired, 0);
+ err = rhashtable_lookup_insert_fast(&br->mdb_hash_tbl, &mp->rhnode,
+ br_mdb_rht_params);
+ if (err) {
+ kfree(mp);
+ mp = ERR_PTR(err);
+ } else {
+ hlist_add_head_rcu(&mp->mdb_node, &br->mdb_list);
+ }
- hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]);
- mdb->size++;
-
-out:
return mp;
}
@@ -768,7 +549,7 @@ static int br_multicast_add_group(struct net_bridge *br,
(port && port->state == BR_STATE_DISABLED))
goto out;
- mp = br_multicast_new_group(br, port, group);
+ mp = br_multicast_new_group(br, group);
err = PTR_ERR(mp);
if (IS_ERR(mp))
goto err;
@@ -837,6 +618,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
if (ipv6_addr_is_ll_all_nodes(group))
return 0;
+ memset(&br_group, 0, sizeof(br_group));
br_group.u.ip6 = *group;
br_group.proto = htons(ETH_P_IPV6);
br_group.vid = vid;
@@ -1483,7 +1265,7 @@ static void br_ip4_multicast_query(struct net_bridge *br,
goto out;
}
- mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid);
+ mp = br_mdb_ip4_get(br, group, vid);
if (!mp)
goto out;
@@ -1567,7 +1349,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
goto out;
}
- mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid);
+ mp = br_mdb_ip6_get(br, group, vid);
if (!mp)
goto out;
@@ -1601,7 +1383,6 @@ br_multicast_leave_group(struct net_bridge *br,
struct bridge_mcast_own_query *own_query,
const unsigned char *src)
{
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
unsigned long now;
@@ -1612,8 +1393,7 @@ br_multicast_leave_group(struct net_bridge *br,
(port && port->state == BR_STATE_DISABLED))
goto out;
- mdb = mlock_dereference(br->mdb, br);
- mp = br_mdb_ip_get(mdb, group);
+ mp = br_mdb_ip_get(br, group);
if (!mp)
goto out;
@@ -1629,7 +1409,7 @@ br_multicast_leave_group(struct net_bridge *br,
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
- call_rcu(&p->rcu, br_multicast_free_pg);
+ kfree_rcu(p, rcu);
br_mdb_notify(br->dev, port, group, RTM_DELMDB,
p->flags);
@@ -1961,8 +1741,7 @@ static void br_ip6_multicast_query_expired(struct timer_list *t)
void br_multicast_init(struct net_bridge *br)
{
- br->hash_elasticity = 4;
- br->hash_max = 512;
+ br->hash_max = BR_MULTICAST_DEFAULT_HASH_MAX;
br->multicast_router = MDB_RTR_TYPE_TEMP_QUERY;
br->multicast_last_member_count = 2;
@@ -1999,6 +1778,7 @@ void br_multicast_init(struct net_bridge *br)
timer_setup(&br->ip6_own_query.timer,
br_ip6_multicast_query_expired, 0);
#endif
+ INIT_HLIST_HEAD(&br->mdb_list);
}
static void __br_multicast_open(struct net_bridge *br,
@@ -2033,40 +1813,20 @@ void br_multicast_stop(struct net_bridge *br)
void br_multicast_dev_del(struct net_bridge *br)
{
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
- struct hlist_node *n;
- u32 ver;
- int i;
+ struct hlist_node *tmp;
spin_lock_bh(&br->multicast_lock);
- mdb = mlock_dereference(br->mdb, br);
- if (!mdb)
- goto out;
-
- br->mdb = NULL;
-
- ver = mdb->ver;
- for (i = 0; i < mdb->max; i++) {
- hlist_for_each_entry_safe(mp, n, &mdb->mhash[i],
- hlist[ver]) {
- del_timer(&mp->timer);
- call_rcu(&mp->rcu, br_multicast_free_group);
- }
- }
-
- if (mdb->old) {
- spin_unlock_bh(&br->multicast_lock);
- rcu_barrier();
- spin_lock_bh(&br->multicast_lock);
- WARN_ON(mdb->old);
+ hlist_for_each_entry_safe(mp, tmp, &br->mdb_list, mdb_node) {
+ del_timer(&mp->timer);
+ rhashtable_remove_fast(&br->mdb_hash_tbl, &mp->rhnode,
+ br_mdb_rht_params);
+ hlist_del_rcu(&mp->mdb_node);
+ kfree_rcu(mp, rcu);
}
-
- mdb->old = mdb;
- call_rcu(&mdb->rcu, br_mdb_free);
-
-out:
spin_unlock_bh(&br->multicast_lock);
+
+ rcu_barrier();
}
int br_multicast_set_router(struct net_bridge *br, unsigned long val)
@@ -2176,9 +1936,7 @@ static void br_multicast_start_querier(struct net_bridge *br,
int br_multicast_toggle(struct net_bridge *br, unsigned long val)
{
- struct net_bridge_mdb_htable *mdb;
struct net_bridge_port *port;
- int err = 0;
spin_lock_bh(&br->multicast_lock);
if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val)
@@ -2192,21 +1950,6 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
if (!netif_running(br->dev))
goto unlock;
- mdb = mlock_dereference(br->mdb, br);
- if (mdb) {
- if (mdb->old) {
- err = -EEXIST;
-rollback:
- br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
- goto unlock;
- }
-
- err = br_mdb_rehash(&br->mdb, mdb->max,
- br->hash_elasticity);
- if (err)
- goto rollback;
- }
-
br_multicast_open(br);
list_for_each_entry(port, &br->port_list, list)
__br_multicast_enable_port(port);
@@ -2214,7 +1957,7 @@ rollback:
unlock:
spin_unlock_bh(&br->multicast_lock);
- return err;
+ return 0;
}
bool br_multicast_enabled(const struct net_device *dev)
@@ -2271,45 +2014,6 @@ unlock:
return 0;
}
-int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
-{
- int err = -EINVAL;
- u32 old;
- struct net_bridge_mdb_htable *mdb;
-
- spin_lock_bh(&br->multicast_lock);
- if (!is_power_of_2(val))
- goto unlock;
-
- mdb = mlock_dereference(br->mdb, br);
- if (mdb && val < mdb->size)
- goto unlock;
-
- err = 0;
-
- old = br->hash_max;
- br->hash_max = val;
-
- if (mdb) {
- if (mdb->old) {
- err = -EEXIST;
-rollback:
- br->hash_max = old;
- goto unlock;
- }
-
- err = br_mdb_rehash(&br->mdb, br->hash_max,
- br->hash_elasticity);
- if (err)
- goto rollback;
- }
-
-unlock:
- spin_unlock_bh(&br->multicast_lock);
-
- return err;
-}
-
int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
{
/* Currently we support only version 2 and 3 */
@@ -2646,3 +2350,13 @@ void br_multicast_get_stats(const struct net_bridge *br,
}
memcpy(dest, &tdst, sizeof(*dest));
}
+
+int br_mdb_hash_init(struct net_bridge *br)
+{
+ return rhashtable_init(&br->mdb_hash_tbl, &br_mdb_rht_params);
+}
+
+void br_mdb_hash_fini(struct net_bridge *br)
+{
+ rhashtable_destroy(&br->mdb_hash_tbl);
+}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index b1b5e8516724..d21a23698410 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -132,10 +132,7 @@ static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
static void nf_bridge_info_free(struct sk_buff *skb)
{
- if (skb->nf_bridge) {
- nf_bridge_put(skb->nf_bridge);
- skb->nf_bridge = NULL;
- }
+ skb_ext_del(skb, SKB_EXT_BRIDGE_NF);
}
static inline struct net_device *bridge_parent(const struct net_device *dev)
@@ -148,19 +145,7 @@ static inline struct net_device *bridge_parent(const struct net_device *dev)
static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
-
- if (refcount_read(&nf_bridge->use) > 1) {
- struct nf_bridge_info *tmp = nf_bridge_alloc(skb);
-
- if (tmp) {
- memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info));
- refcount_set(&tmp->use, 1);
- }
- nf_bridge_put(nf_bridge);
- nf_bridge = tmp;
- }
- return nf_bridge;
+ return skb_ext_add(skb, SKB_EXT_BRIDGE_NF);
}
unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
@@ -247,7 +232,9 @@ drop:
void nf_bridge_update_protocol(struct sk_buff *skb)
{
- switch (skb->nf_bridge->orig_proto) {
+ const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+
+ switch (nf_bridge->orig_proto) {
case BRNF_PROTO_8021Q:
skb->protocol = htons(ETH_P_8021Q);
break;
@@ -506,7 +493,6 @@ static unsigned int br_nf_pre_routing(void *priv,
if (br_validate_ipv4(state->net, skb))
return NF_DROP;
- nf_bridge_put(skb->nf_bridge);
if (!nf_bridge_alloc(skb))
return NF_DROP;
if (!setup_pre_routing(skb))
@@ -569,7 +555,8 @@ static unsigned int br_nf_forward_ip(void *priv,
struct net_device *parent;
u_int8_t pf;
- if (!skb->nf_bridge)
+ nf_bridge = nf_bridge_info_get(skb);
+ if (!nf_bridge)
return NF_ACCEPT;
/* Need exclusive nf_bridge_info since we might have multiple
@@ -671,10 +658,8 @@ static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff
return 0;
}
- if (data->vlan_tci) {
- skb->vlan_tci = data->vlan_tci;
- skb->vlan_proto = data->vlan_proto;
- }
+ if (data->vlan_proto)
+ __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
__skb_push(skb, data->encap_size);
@@ -703,7 +688,9 @@ br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
{
- if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
+ const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+
+ if (nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
return PPPOE_SES_HLEN;
return 0;
}
@@ -740,8 +727,13 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
data = this_cpu_ptr(&brnf_frag_data_storage);
- data->vlan_tci = skb->vlan_tci;
- data->vlan_proto = skb->vlan_proto;
+ if (skb_vlan_tag_present(skb)) {
+ data->vlan_tci = skb->vlan_tci;
+ data->vlan_proto = skb->vlan_proto;
+ } else {
+ data->vlan_proto = 0;
+ }
+
data->encap_size = nf_bridge_encap_header_len(skb);
data->size = ETH_HLEN + data->encap_size;
@@ -836,7 +828,9 @@ static unsigned int ip_sabotage_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (skb->nf_bridge && !skb->nf_bridge->in_prerouting &&
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+
+ if (nf_bridge && !nf_bridge->in_prerouting &&
!netif_is_l3_master(skb->dev)) {
state->okfn(state->net, state->sk, skb);
return NF_STOLEN;
@@ -874,7 +868,9 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
static int br_nf_dev_xmit(struct sk_buff *skb)
{
- if (skb->nf_bridge && skb->nf_bridge->bridged_dnat) {
+ const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+
+ if (nf_bridge && nf_bridge->bridged_dnat) {
br_nf_pre_routing_finish_bridge_slow(skb);
return 1;
}
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 96c072e71ea2..94039f588f1d 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -224,8 +224,8 @@ unsigned int br_nf_pre_routing_ipv6(void *priv,
if (br_validate_ipv6(state->net, skb))
return NF_DROP;
- nf_bridge_put(skb->nf_bridge);
- if (!nf_bridge_alloc(skb))
+ nf_bridge = nf_bridge_alloc(skb);
+ if (!nf_bridge)
return NF_DROP;
if (!setup_pre_routing(skb))
return NF_DROP;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 3345f1984542..9c07591b0232 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -525,7 +525,8 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
}
static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
- int cmd, struct bridge_vlan_info *vinfo, bool *changed)
+ int cmd, struct bridge_vlan_info *vinfo, bool *changed,
+ struct netlink_ext_ack *extack)
{
bool curr_change;
int err = 0;
@@ -537,11 +538,11 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
* per-VLAN entry as well
*/
err = nbp_vlan_add(p, vinfo->vid, vinfo->flags,
- &curr_change);
+ &curr_change, extack);
} else {
vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY;
err = br_vlan_add(br, vinfo->vid, vinfo->flags,
- &curr_change);
+ &curr_change, extack);
}
if (curr_change)
*changed = true;
@@ -568,7 +569,8 @@ static int br_process_vlan_info(struct net_bridge *br,
struct net_bridge_port *p, int cmd,
struct bridge_vlan_info *vinfo_curr,
struct bridge_vlan_info **vinfo_last,
- bool *changed)
+ bool *changed,
+ struct netlink_ext_ack *extack)
{
if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK)
return -EINVAL;
@@ -598,7 +600,8 @@ static int br_process_vlan_info(struct net_bridge *br,
sizeof(struct bridge_vlan_info));
for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) {
tmp_vinfo.vid = v;
- err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed);
+ err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed,
+ extack);
if (err)
break;
}
@@ -607,13 +610,14 @@ static int br_process_vlan_info(struct net_bridge *br,
return err;
}
- return br_vlan_info(br, p, cmd, vinfo_curr, changed);
+ return br_vlan_info(br, p, cmd, vinfo_curr, changed, extack);
}
static int br_afspec(struct net_bridge *br,
struct net_bridge_port *p,
struct nlattr *af_spec,
- int cmd, bool *changed)
+ int cmd, bool *changed,
+ struct netlink_ext_ack *extack)
{
struct bridge_vlan_info *vinfo_curr = NULL;
struct bridge_vlan_info *vinfo_last = NULL;
@@ -643,7 +647,8 @@ static int br_afspec(struct net_bridge *br,
return -EINVAL;
vinfo_curr = nla_data(attr);
err = br_process_vlan_info(br, p, cmd, vinfo_curr,
- &vinfo_last, changed);
+ &vinfo_last, changed,
+ extack);
if (err)
return err;
break;
@@ -850,7 +855,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
}
/* Change state and parameters on port. */
-int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
+int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags,
+ struct netlink_ext_ack *extack)
{
struct net_bridge *br = (struct net_bridge *)netdev_priv(dev);
struct nlattr *tb[IFLA_BRPORT_MAX + 1];
@@ -897,7 +903,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
}
if (afspec)
- err = br_afspec(br, p, afspec, RTM_SETLINK, &changed);
+ err = br_afspec(br, p, afspec, RTM_SETLINK, &changed, extack);
if (changed)
br_ifinfo_notify(RTM_NEWLINK, br, p);
@@ -923,7 +929,7 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
if (!p && !(dev->priv_flags & IFF_EBRIDGE))
return -EINVAL;
- err = br_afspec(br, p, afspec, RTM_DELLINK, &changed);
+ err = br_afspec(br, p, afspec, RTM_DELLINK, &changed, NULL);
if (changed)
/* Send RTM_NEWLINK because userspace
* expects RTM_NEWLINK for vlan dels
@@ -1035,6 +1041,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_MCAST_IGMP_VERSION] = { .type = NLA_U8 },
[IFLA_BR_MCAST_MLD_VERSION] = { .type = NLA_U8 },
[IFLA_BR_VLAN_STATS_PER_PORT] = { .type = NLA_U8 },
+ [IFLA_BR_MULTI_BOOLOPT] = { .type = NLA_EXACT_LEN,
+ .len = sizeof(struct br_boolopt_multi) },
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -1103,7 +1111,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_VLAN_DEFAULT_PVID]) {
__u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]);
- err = __br_vlan_set_default_pvid(br, defpvid);
+ err = __br_vlan_set_default_pvid(br, defpvid, extack);
if (err)
return err;
}
@@ -1167,9 +1175,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (data[IFLA_BR_MCAST_SNOOPING]) {
u8 mcast_snooping = nla_get_u8(data[IFLA_BR_MCAST_SNOOPING]);
- err = br_multicast_toggle(br, mcast_snooping);
- if (err)
- return err;
+ br_multicast_toggle(br, mcast_snooping);
}
if (data[IFLA_BR_MCAST_QUERY_USE_IFADDR]) {
@@ -1187,19 +1193,12 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
return err;
}
- if (data[IFLA_BR_MCAST_HASH_ELASTICITY]) {
- u32 val = nla_get_u32(data[IFLA_BR_MCAST_HASH_ELASTICITY]);
-
- br->hash_elasticity = val;
- }
+ if (data[IFLA_BR_MCAST_HASH_ELASTICITY])
+ br_warn(br, "the hash_elasticity option has been deprecated and is always %u\n",
+ RHT_ELASTICITY);
- if (data[IFLA_BR_MCAST_HASH_MAX]) {
- u32 hash_max = nla_get_u32(data[IFLA_BR_MCAST_HASH_MAX]);
-
- err = br_multicast_set_hash_max(br, hash_max);
- if (err)
- return err;
- }
+ if (data[IFLA_BR_MCAST_HASH_MAX])
+ br->hash_max = nla_get_u32(data[IFLA_BR_MCAST_HASH_MAX]);
if (data[IFLA_BR_MCAST_LAST_MEMBER_CNT]) {
u32 val = nla_get_u32(data[IFLA_BR_MCAST_LAST_MEMBER_CNT]);
@@ -1296,6 +1295,15 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
}
#endif
+ if (data[IFLA_BR_MULTI_BOOLOPT]) {
+ struct br_boolopt_multi *bm;
+
+ bm = nla_data(data[IFLA_BR_MULTI_BOOLOPT]);
+ err = br_boolopt_multi_toggle(br, bm, extack);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -1374,6 +1382,7 @@ static size_t br_get_size(const struct net_device *brdev)
nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IP6TABLES */
nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_ARPTABLES */
#endif
+ nla_total_size(sizeof(struct br_boolopt_multi)) + /* IFLA_BR_MULTI_BOOLOPT */
0;
}
@@ -1387,6 +1396,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
u32 stp_enabled = br->stp_enabled;
u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1];
u8 vlan_enabled = br_vlan_enabled(br->dev);
+ struct br_boolopt_multi bm;
u64 clockval;
clockval = br_timer_value(&br->hello_timer);
@@ -1403,6 +1413,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
if (nla_put_u64_64bit(skb, IFLA_BR_GC_TIMER, clockval, IFLA_BR_PAD))
return -EMSGSIZE;
+ br_boolopt_multi_get(br, &bm);
if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) ||
nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) ||
nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time) ||
@@ -1420,7 +1431,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE, br->topology_change) ||
nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
br->topology_change_detected) ||
- nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr))
+ nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr) ||
+ nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm))
return -EMSGSIZE;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
@@ -1442,8 +1454,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
br_opt_get(br, BROPT_MULTICAST_QUERIER)) ||
nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED,
br_opt_get(br, BROPT_MULTICAST_STATS_ENABLED)) ||
- nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY,
- br->hash_elasticity) ||
+ nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY, RHT_ELASTICITY) ||
nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT,
br->multicast_last_member_count) ||
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 04c19a37e500..d240b3e7919f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -31,6 +31,8 @@
#define BR_PORT_BITS 10
#define BR_MAX_PORTS (1<<BR_PORT_BITS)
+#define BR_MULTICAST_DEFAULT_HASH_MAX 4096
+
#define BR_VERSION "2.3"
/* Control of forwarding link local multicast */
@@ -213,23 +215,14 @@ struct net_bridge_port_group {
};
struct net_bridge_mdb_entry {
- struct hlist_node hlist[2];
+ struct rhash_head rhnode;
struct net_bridge *br;
struct net_bridge_port_group __rcu *ports;
struct rcu_head rcu;
struct timer_list timer;
struct br_ip addr;
bool host_joined;
-};
-
-struct net_bridge_mdb_htable {
- struct hlist_head *mhash;
- struct rcu_head rcu;
- struct net_bridge_mdb_htable *old;
- u32 size;
- u32 max;
- u32 secret;
- u32 ver;
+ struct hlist_node mdb_node;
};
struct net_bridge_port {
@@ -328,6 +321,7 @@ enum net_bridge_opts {
BROPT_NEIGH_SUPPRESS_ENABLED,
BROPT_MTU_SET_BY_USER,
BROPT_VLAN_STATS_PER_PORT,
+ BROPT_NO_LL_LEARN,
};
struct net_bridge {
@@ -380,7 +374,6 @@ struct net_bridge {
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
- u32 hash_elasticity;
u32 hash_max;
u32 multicast_last_member_count;
@@ -399,7 +392,9 @@ struct net_bridge {
unsigned long multicast_query_response_interval;
unsigned long multicast_startup_query_interval;
- struct net_bridge_mdb_htable __rcu *mdb;
+ struct rhashtable mdb_hash_tbl;
+
+ struct hlist_head mdb_list;
struct hlist_head router_list;
struct timer_list multicast_router_timer;
@@ -507,6 +502,14 @@ static inline int br_opt_get(const struct net_bridge *br,
return test_bit(opt, &br->options);
}
+int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
+ struct netlink_ext_ack *extack);
+int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt);
+int br_boolopt_multi_toggle(struct net_bridge *br,
+ struct br_boolopt_multi *bm,
+ struct netlink_ext_ack *extack);
+void br_boolopt_multi_get(const struct net_bridge *br,
+ struct br_boolopt_multi *bm);
void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on);
/* br_device.c */
@@ -572,6 +575,9 @@ int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags);
int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev, struct net_device *fdev, int *idx);
+int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
+ const unsigned char *addr, u16 vid, u32 portid, u32 seq,
+ struct netlink_ext_ack *extack);
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
@@ -650,7 +656,6 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd,
/* br_multicast.c */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-extern unsigned int br_mdb_rehash_seq;
int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
struct sk_buff *skb, u16 vid);
struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
@@ -675,17 +680,15 @@ int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val);
int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val);
#endif
struct net_bridge_mdb_entry *
-br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, struct br_ip *dst);
+br_mdb_ip_get(struct net_bridge *br, struct br_ip *dst);
struct net_bridge_mdb_entry *
-br_multicast_new_group(struct net_bridge *br, struct net_bridge_port *port,
- struct br_ip *group);
-void br_multicast_free_pg(struct rcu_head *head);
+br_multicast_new_group(struct net_bridge *br, struct br_ip *group);
struct net_bridge_port_group *
br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags, const unsigned char *src);
-void br_mdb_init(void);
-void br_mdb_uninit(void);
+int br_mdb_hash_init(struct net_bridge *br);
+void br_mdb_hash_fini(struct net_bridge *br);
void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
struct br_ip *group, int type, u8 flags);
void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
@@ -697,6 +700,8 @@ void br_multicast_uninit_stats(struct net_bridge *br);
void br_multicast_get_stats(const struct net_bridge *br,
const struct net_bridge_port *p,
struct br_mcast_stats *dest);
+void br_mdb_init(void);
+void br_mdb_uninit(void);
#define mlock_dereference(X, br) \
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
@@ -822,6 +827,15 @@ static inline void br_mdb_uninit(void)
{
}
+static inline int br_mdb_hash_init(struct net_bridge *br)
+{
+ return 0;
+}
+
+static inline void br_mdb_hash_fini(struct net_bridge *br)
+{
+}
+
static inline void br_multicast_count(struct net_bridge *br,
const struct net_bridge_port *p,
const struct sk_buff *skb,
@@ -857,7 +871,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
struct net_bridge_vlan_group *vg,
struct sk_buff *skb);
int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
- bool *changed);
+ bool *changed, struct netlink_ext_ack *extack);
int br_vlan_delete(struct net_bridge *br, u16 vid);
void br_vlan_flush(struct net_bridge *br);
struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid);
@@ -870,12 +884,13 @@ int br_vlan_set_stats(struct net_bridge *br, unsigned long val);
int br_vlan_set_stats_per_port(struct net_bridge *br, unsigned long val);
int br_vlan_init(struct net_bridge *br);
int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
-int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
+int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid,
+ struct netlink_ext_ack *extack);
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
- bool *changed);
+ bool *changed, struct netlink_ext_ack *extack);
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
void nbp_vlan_flush(struct net_bridge_port *port);
-int nbp_vlan_init(struct net_bridge_port *port);
+int nbp_vlan_init(struct net_bridge_port *port, struct netlink_ext_ack *extack);
int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask);
void br_vlan_get_stats(const struct net_bridge_vlan *v,
struct br_vlan_stats *stats);
@@ -912,7 +927,7 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
int err = 0;
if (skb_vlan_tag_present(skb)) {
- *vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK;
+ *vid = skb_vlan_tag_get_id(skb);
} else {
*vid = 0;
err = -EINVAL;
@@ -960,7 +975,7 @@ static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
}
static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags,
- bool *changed)
+ bool *changed, struct netlink_ext_ack *extack)
{
*changed = false;
return -EOPNOTSUPP;
@@ -985,7 +1000,7 @@ static inline int br_vlan_init(struct net_bridge *br)
}
static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
- bool *changed)
+ bool *changed, struct netlink_ext_ack *extack)
{
*changed = false;
return -EOPNOTSUPP;
@@ -1006,7 +1021,8 @@ static inline struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group
return NULL;
}
-static inline int nbp_vlan_init(struct net_bridge_port *port)
+static inline int nbp_vlan_init(struct net_bridge_port *port,
+ struct netlink_ext_ack *extack)
{
return 0;
}
@@ -1127,7 +1143,8 @@ int br_netlink_init(void);
void br_netlink_fini(void);
void br_ifinfo_notify(int event, const struct net_bridge *br,
const struct net_bridge_port *port);
-int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
+int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags,
+ struct netlink_ext_ack *extack);
int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev,
u32 filter_mask, int nlflags);
@@ -1162,7 +1179,8 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
unsigned long mask);
void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb,
int type);
-int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags);
+int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
+ struct netlink_ext_ack *extack);
int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid);
static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
@@ -1194,7 +1212,8 @@ static inline int br_switchdev_set_port_flag(struct net_bridge_port *p,
}
static inline int br_switchdev_port_vlan_add(struct net_device *dev,
- u16 vid, u16 flags)
+ u16 vid, u16 flags,
+ struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index b993df770675..035ff59d9cbd 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -140,7 +140,8 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
}
}
-int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags)
+int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags,
+ struct netlink_ext_ack *extack)
{
struct switchdev_obj_port_vlan v = {
.obj.orig_dev = dev,
@@ -150,7 +151,7 @@ int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags)
.vid_end = vid,
};
- return switchdev_port_obj_add(dev, &v.obj);
+ return switchdev_port_obj_add(dev, &v.obj, extack);
}
int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 60182bef6341..b05b94e9c595 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -328,6 +328,27 @@ static ssize_t flush_store(struct device *d,
}
static DEVICE_ATTR_WO(flush);
+static ssize_t no_linklocal_learn_show(struct device *d,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_bridge *br = to_bridge(d);
+ return sprintf(buf, "%d\n", br_boolopt_get(br, BR_BOOLOPT_NO_LL_LEARN));
+}
+
+static int set_no_linklocal_learn(struct net_bridge *br, unsigned long val)
+{
+ return br_boolopt_toggle(br, BR_BOOLOPT_NO_LL_LEARN, !!val, NULL);
+}
+
+static ssize_t no_linklocal_learn_store(struct device *d,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return store_bridge_parm(d, buf, len, set_no_linklocal_learn);
+}
+static DEVICE_ATTR_RW(no_linklocal_learn);
+
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t multicast_router_show(struct device *d,
struct device_attribute *attr, char *buf)
@@ -403,13 +424,13 @@ static DEVICE_ATTR_RW(multicast_querier);
static ssize_t hash_elasticity_show(struct device *d,
struct device_attribute *attr, char *buf)
{
- struct net_bridge *br = to_bridge(d);
- return sprintf(buf, "%u\n", br->hash_elasticity);
+ return sprintf(buf, "%u\n", RHT_ELASTICITY);
}
static int set_elasticity(struct net_bridge *br, unsigned long val)
{
- br->hash_elasticity = val;
+ br_warn(br, "the hash_elasticity option has been deprecated and is always %u\n",
+ RHT_ELASTICITY);
return 0;
}
@@ -428,10 +449,16 @@ static ssize_t hash_max_show(struct device *d, struct device_attribute *attr,
return sprintf(buf, "%u\n", br->hash_max);
}
+static int set_hash_max(struct net_bridge *br, unsigned long val)
+{
+ br->hash_max = val;
+ return 0;
+}
+
static ssize_t hash_max_store(struct device *d, struct device_attribute *attr,
const char *buf, size_t len)
{
- return store_bridge_parm(d, buf, len, br_multicast_set_hash_max);
+ return store_bridge_parm(d, buf, len, set_hash_max);
}
static DEVICE_ATTR_RW(hash_max);
@@ -841,6 +868,7 @@ static struct attribute *bridge_attrs[] = {
&dev_attr_gc_timer.attr,
&dev_attr_group_addr.attr,
&dev_attr_flush.attr,
+ &dev_attr_no_linklocal_learn.attr,
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
&dev_attr_multicast_router.attr,
&dev_attr_multicast_snooping.attr,
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 7c87a2fe5248..88715edb119a 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -320,9 +320,6 @@ static ssize_t brport_store(struct kobject *kobj,
if (!rtnl_trylock())
return restart_syscall();
- if (!p->dev || !p->br)
- goto out_unlock;
-
if (brport_attr->store_raw) {
char *buf_copy;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index e84be08b8285..4a2f31157ef5 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -80,14 +80,14 @@ static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
}
static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
- u16 vid, u16 flags)
+ u16 vid, u16 flags, struct netlink_ext_ack *extack)
{
int err;
/* Try switchdev op first. In case it is not supported, fallback to
* 8021q add.
*/
- err = br_switchdev_port_vlan_add(dev, vid, flags);
+ err = br_switchdev_port_vlan_add(dev, vid, flags, extack);
if (err == -EOPNOTSUPP)
return vlan_vid_add(dev, br->vlan_proto, vid);
return err;
@@ -139,7 +139,9 @@ static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
/* Returns a master vlan, if it didn't exist it gets created. In all cases a
* a reference is taken to the master vlan before returning.
*/
-static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid)
+static struct net_bridge_vlan *
+br_vlan_get_master(struct net_bridge *br, u16 vid,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *masterv;
@@ -150,7 +152,7 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid
bool changed;
/* missing global ctx, create it now */
- if (br_vlan_add(br, vid, 0, &changed))
+ if (br_vlan_add(br, vid, 0, &changed, extack))
return NULL;
masterv = br_vlan_find(vg, vid);
if (WARN_ON(!masterv))
@@ -214,7 +216,8 @@ static void nbp_vlan_rcu_free(struct rcu_head *rcu)
* 4. same as 3 but with both master and brentry flags set so the entry
* will be used for filtering in both the port and the bridge
*/
-static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
+static int __vlan_add(struct net_bridge_vlan *v, u16 flags,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_vlan *masterv = NULL;
struct net_bridge_port *p = NULL;
@@ -239,7 +242,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
* This ensures tagged traffic enters the bridge when
* promiscuous mode is disabled by br_manage_promisc().
*/
- err = __vlan_vid_add(dev, br, v->vid, flags);
+ err = __vlan_vid_add(dev, br, v->vid, flags, extack);
if (err)
goto out;
@@ -249,12 +252,12 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
err = br_vlan_add(br, v->vid,
flags | BRIDGE_VLAN_INFO_BRENTRY,
- &changed);
+ &changed, extack);
if (err)
goto out_filt;
}
- masterv = br_vlan_get_master(br, v->vid);
+ masterv = br_vlan_get_master(br, v->vid, extack);
if (!masterv)
goto out_filt;
v->brvlan = masterv;
@@ -269,7 +272,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
v->stats = masterv->stats;
}
} else {
- err = br_switchdev_port_vlan_add(dev, v->vid, flags);
+ err = br_switchdev_port_vlan_add(dev, v->vid, flags, extack);
if (err && err != -EOPNOTSUPP)
goto out;
}
@@ -421,7 +424,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
}
if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
if (p && (p->flags & BR_VLAN_TUNNEL) &&
br_handle_egress_vlan_tunnel(skb, v)) {
@@ -494,8 +497,8 @@ static bool __allowed_ingress(const struct net_bridge *br,
__vlan_hwaccel_put_tag(skb, br->vlan_proto, pvid);
else
/* Priority-tagged Frame.
- * At this point, We know that skb->vlan_tci had
- * VLAN_TAG_PRESENT bit and its VID field was 0x000.
+ * At this point, we know that skb->vlan_tci VID
+ * field was 0.
* We update only VID field and preserve PCP field.
*/
skb->vlan_tci |= pvid;
@@ -591,11 +594,12 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
static int br_vlan_add_existing(struct net_bridge *br,
struct net_bridge_vlan_group *vg,
struct net_bridge_vlan *vlan,
- u16 flags, bool *changed)
+ u16 flags, bool *changed,
+ struct netlink_ext_ack *extack)
{
int err;
- err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags);
+ err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags, extack);
if (err && err != -EOPNOTSUPP)
return err;
@@ -634,7 +638,8 @@ err_flags:
* Must be called with vid in range from 1 to 4094 inclusive.
* changed must be true only if the vlan was created or updated
*/
-int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
+int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *vlan;
@@ -646,7 +651,8 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
vg = br_vlan_group(br);
vlan = br_vlan_find(vg, vid);
if (vlan)
- return br_vlan_add_existing(br, vg, vlan, flags, changed);
+ return br_vlan_add_existing(br, vg, vlan, flags, changed,
+ extack);
vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
if (!vlan)
@@ -663,7 +669,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
vlan->br = br;
if (flags & BRIDGE_VLAN_INFO_BRENTRY)
refcount_set(&vlan->refcnt, 1);
- ret = __vlan_add(vlan, flags);
+ ret = __vlan_add(vlan, flags, extack);
if (ret) {
free_percpu(vlan->stats);
kfree(vlan);
@@ -914,7 +920,8 @@ static void br_vlan_disable_default_pvid(struct net_bridge *br)
br->default_pvid = 0;
}
-int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
+int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid,
+ struct netlink_ext_ack *extack)
{
const struct net_bridge_vlan *pvent;
struct net_bridge_vlan_group *vg;
@@ -946,7 +953,7 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED |
BRIDGE_VLAN_INFO_BRENTRY,
- &vlchange);
+ &vlchange, extack);
if (err)
goto out;
br_vlan_delete(br, old_pvid);
@@ -966,7 +973,7 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
err = nbp_vlan_add(p, pvid,
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED,
- &vlchange);
+ &vlchange, extack);
if (err)
goto err_port;
nbp_vlan_delete(p, old_pvid);
@@ -988,7 +995,7 @@ err_port:
nbp_vlan_add(p, old_pvid,
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED,
- &vlchange);
+ &vlchange, NULL);
nbp_vlan_delete(p, pvid);
}
@@ -998,7 +1005,7 @@ err_port:
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED |
BRIDGE_VLAN_INFO_BRENTRY,
- &vlchange);
+ &vlchange, NULL);
br_vlan_delete(br, pvid);
}
goto out;
@@ -1021,7 +1028,7 @@ int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val)
err = -EPERM;
goto out;
}
- err = __br_vlan_set_default_pvid(br, pvid);
+ err = __br_vlan_set_default_pvid(br, pvid, NULL);
out:
return err;
}
@@ -1047,7 +1054,7 @@ int br_vlan_init(struct net_bridge *br)
rcu_assign_pointer(br->vlgrp, vg);
ret = br_vlan_add(br, 1,
BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED |
- BRIDGE_VLAN_INFO_BRENTRY, &changed);
+ BRIDGE_VLAN_INFO_BRENTRY, &changed, NULL);
if (ret)
goto err_vlan_add;
@@ -1064,7 +1071,7 @@ err_rhtbl:
goto out;
}
-int nbp_vlan_init(struct net_bridge_port *p)
+int nbp_vlan_init(struct net_bridge_port *p, struct netlink_ext_ack *extack)
{
struct switchdev_attr attr = {
.orig_dev = p->br->dev,
@@ -1097,7 +1104,7 @@ int nbp_vlan_init(struct net_bridge_port *p)
ret = nbp_vlan_add(p, p->br->default_pvid,
BRIDGE_VLAN_INFO_PVID |
BRIDGE_VLAN_INFO_UNTAGGED,
- &changed);
+ &changed, extack);
if (ret)
goto err_vlan_add;
}
@@ -1122,7 +1129,7 @@ err_vlan_enabled:
* changed must be true only if the vlan was created or updated
*/
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
- bool *changed)
+ bool *changed, struct netlink_ext_ack *extack)
{
struct net_bridge_vlan *vlan;
int ret;
@@ -1133,7 +1140,7 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
vlan = br_vlan_find(nbp_vlan_group(port), vid);
if (vlan) {
/* Pass the flags to the hardware bridge */
- ret = br_switchdev_port_vlan_add(port->dev, vid, flags);
+ ret = br_switchdev_port_vlan_add(port->dev, vid, flags, extack);
if (ret && ret != -EOPNOTSUPP)
return ret;
*changed = __vlan_add_flags(vlan, flags);
@@ -1147,7 +1154,7 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
vlan->vid = vid;
vlan->port = port;
- ret = __vlan_add(vlan, flags);
+ ret = __vlan_add(vlan, flags, extack);
if (ret)
kfree(vlan);
else
@@ -1217,9 +1224,13 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v,
int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid)
{
struct net_bridge_vlan_group *vg;
+ struct net_bridge_port *p;
ASSERT_RTNL();
- if (netif_is_bridge_master(dev))
+ p = br_port_get_check_rtnl(dev);
+ if (p)
+ vg = nbp_vlan_group(p);
+ else if (netif_is_bridge_master(dev))
vg = br_vlan_group(netdev_priv(dev));
else
return -EINVAL;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 57f3a6fcfc1e..4bf62b1afa3b 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -728,49 +728,6 @@ fault:
return -EFAULT;
}
-__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
-{
- __sum16 sum;
-
- sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
- if (likely(!sum)) {
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
- !skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
- }
- if (!skb_shared(skb))
- skb->csum_valid = !sum;
- return sum;
-}
-EXPORT_SYMBOL(__skb_checksum_complete_head);
-
-__sum16 __skb_checksum_complete(struct sk_buff *skb)
-{
- __wsum csum;
- __sum16 sum;
-
- csum = skb_checksum(skb, 0, skb->len, 0);
-
- /* skb->csum holds pseudo checksum */
- sum = csum_fold(csum_add(skb->csum, csum));
- if (likely(!sum)) {
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
- !skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
- }
-
- if (!skb_shared(skb)) {
- /* Save full packet checksum */
- skb->csum = csum;
- skb->ip_summed = CHECKSUM_COMPLETE;
- skb->csum_complete_sw = 1;
- skb->csum_valid = !sum;
- }
-
- return sum;
-}
-EXPORT_SYMBOL(__skb_checksum_complete);
-
/**
* skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec.
* @skb: skbuff
@@ -810,7 +767,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
!skb->csum_complete_sw)
- netdev_rx_csum_fault(NULL);
+ netdev_rx_csum_fault(NULL, skb);
}
return 0;
fault:
diff --git a/net/core/dev.c b/net/core/dev.c
index 722d50dbf8a4..1b5a4410be0e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
#include <linux/net_namespace.h>
+#include <linux/indirect_call_wrapper.h>
#include "net-sysfs.h"
@@ -162,6 +163,9 @@ static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
struct netdev_notifier_info *info);
+static int call_netdevice_notifiers_extack(unsigned long val,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack);
static struct napi_struct *napi_by_id(unsigned int napi_id);
/*
@@ -1361,7 +1365,7 @@ void netdev_notify_peers(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_notify_peers);
-static int __dev_open(struct net_device *dev)
+static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
int ret;
@@ -1377,7 +1381,7 @@ static int __dev_open(struct net_device *dev)
*/
netpoll_poll_disable(dev);
- ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
+ ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack);
ret = notifier_to_errno(ret);
if (ret)
return ret;
@@ -1406,7 +1410,8 @@ static int __dev_open(struct net_device *dev)
/**
* dev_open - prepare an interface for use.
- * @dev: device to open
+ * @dev: device to open
+ * @extack: netlink extended ack
*
* Takes a device from down to up state. The device's private open
* function is invoked and then the multicast lists are loaded. Finally
@@ -1416,14 +1421,14 @@ static int __dev_open(struct net_device *dev)
* Calling this function on an active interface is a nop. On a failure
* a negative errno code is returned.
*/
-int dev_open(struct net_device *dev)
+int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
int ret;
if (dev->flags & IFF_UP)
return 0;
- ret = __dev_open(dev);
+ ret = __dev_open(dev, extack);
if (ret < 0)
return ret;
@@ -1585,6 +1590,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
+ N(PRE_CHANGEADDR)
}
#undef N
return "UNKNOWN_NETDEV_EVENT";
@@ -1733,6 +1739,18 @@ static int call_netdevice_notifiers_info(unsigned long val,
return raw_notifier_call_chain(&netdev_chain, val, info);
}
+static int call_netdevice_notifiers_extack(unsigned long val,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ .extack = extack,
+ };
+
+ return call_netdevice_notifiers_info(val, &info);
+}
+
/**
* call_netdevice_notifiers - call all network notifier blocks
* @val: value passed unmodified to notifier function
@@ -1744,11 +1762,7 @@ static int call_netdevice_notifiers_info(unsigned long val,
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- struct netdev_notifier_info info = {
- .dev = dev,
- };
-
- return call_netdevice_notifiers_info(val, &info);
+ return call_netdevice_notifiers_extack(val, dev, NULL);
}
EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -3096,10 +3110,17 @@ EXPORT_SYMBOL(__skb_gso_segment);
/* Take action when hardware reception checksum errors are detected. */
#ifdef CONFIG_BUG
-void netdev_rx_csum_fault(struct net_device *dev)
+void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
{
if (net_ratelimit()) {
pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
+ if (dev)
+ pr_err("dev features: %pNF\n", &dev->features);
+ pr_err("skb len=%u data_len=%u pkt_type=%u gso_size=%u gso_type=%u nr_frags=%u ip_summed=%u csum=%x csum_complete_sw=%d csum_valid=%d csum_level=%u\n",
+ skb->len, skb->data_len, skb->pkt_type,
+ skb_shinfo(skb)->gso_size, skb_shinfo(skb)->gso_type,
+ skb_shinfo(skb)->nr_frags, skb->ip_summed, skb->csum,
+ skb->csum_complete_sw, skb->csum_valid, skb->csum_level);
dump_stack();
}
}
@@ -4525,9 +4546,14 @@ static int netif_rx_internal(struct sk_buff *skb)
int netif_rx(struct sk_buff *skb)
{
+ int ret;
+
trace_netif_rx_entry(skb);
- return netif_rx_internal(skb);
+ ret = netif_rx_internal(skb);
+ trace_netif_rx_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(netif_rx);
@@ -4542,6 +4568,7 @@ int netif_rx_ni(struct sk_buff *skb)
if (local_softirq_pending())
do_softirq();
preempt_enable();
+ trace_netif_rx_ni_exit(err);
return err;
}
@@ -4894,7 +4921,7 @@ skip_classify:
* and set skb->priority like in vlan_do_receive()
* For the time being, just ignore Priority Code Point
*/
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
}
type = skb->protocol;
@@ -5227,9 +5254,14 @@ static void netif_receive_skb_list_internal(struct list_head *head)
*/
int netif_receive_skb(struct sk_buff *skb)
{
+ int ret;
+
trace_netif_receive_skb_entry(skb);
- return netif_receive_skb_internal(skb);
+ ret = netif_receive_skb_internal(skb);
+ trace_netif_receive_skb_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(netif_receive_skb);
@@ -5249,9 +5281,12 @@ void netif_receive_skb_list(struct list_head *head)
if (list_empty(head))
return;
- list_for_each_entry(skb, head, list)
- trace_netif_receive_skb_list_entry(skb);
+ if (trace_netif_receive_skb_list_entry_enabled()) {
+ list_for_each_entry(skb, head, list)
+ trace_netif_receive_skb_list_entry(skb);
+ }
netif_receive_skb_list_internal(head);
+ trace_netif_receive_skb_list_exit(0);
}
EXPORT_SYMBOL(netif_receive_skb_list);
@@ -5304,6 +5339,8 @@ static void flush_all_backlogs(void)
put_online_cpus();
}
+INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
static int napi_gro_complete(struct sk_buff *skb)
{
struct packet_offload *ptype;
@@ -5323,7 +5360,9 @@ static int napi_gro_complete(struct sk_buff *skb)
if (ptype->type != type || !ptype->callbacks.gro_complete)
continue;
- err = ptype->callbacks.gro_complete(skb, 0);
+ err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
+ ipv6_gro_complete, inet_gro_complete,
+ skb, 0);
break;
}
rcu_read_unlock();
@@ -5362,11 +5401,13 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
*/
void napi_gro_flush(struct napi_struct *napi, bool flush_old)
{
- u32 i;
+ unsigned long bitmask = napi->gro_bitmask;
+ unsigned int i, base = ~0U;
- for (i = 0; i < GRO_HASH_BUCKETS; i++) {
- if (test_bit(i, &napi->gro_bitmask))
- __napi_gro_flush_chain(napi, i, flush_old);
+ while ((i = ffs(bitmask)) != 0) {
+ bitmask >>= i;
+ base += i;
+ __napi_gro_flush_chain(napi, base, flush_old);
}
}
EXPORT_SYMBOL(napi_gro_flush);
@@ -5391,7 +5432,9 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
}
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
- diffs |= p->vlan_tci ^ skb->vlan_tci;
+ diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
+ if (skb_vlan_tag_present(p))
+ diffs |= p->vlan_tci ^ skb->vlan_tci;
diffs |= skb_metadata_dst_cmp(p, skb);
diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
@@ -5466,6 +5509,10 @@ static void gro_flush_oldest(struct list_head *head)
napi_gro_complete(oldest);
}
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
+ struct sk_buff *));
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
@@ -5515,7 +5562,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->csum_valid = 0;
}
- pp = ptype->callbacks.gro_receive(gro_head, skb);
+ pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
+ ipv6_gro_receive, inet_gro_receive,
+ gro_head, skb);
break;
}
rcu_read_unlock();
@@ -5639,12 +5688,17 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
+ gro_result_t ret;
+
skb_mark_napi_id(skb, napi);
trace_napi_gro_receive_entry(skb);
skb_gro_reset_offset(skb);
- return napi_skb_finish(dev_gro_receive(napi, skb), skb);
+ ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
+ trace_napi_gro_receive_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(napi_gro_receive);
@@ -5657,7 +5711,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
__skb_pull(skb, skb_headlen(skb));
/* restore the reserve we had after netdev_alloc_skb_ip_align() */
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
skb->dev = napi->dev;
skb->skb_iif = 0;
@@ -5762,6 +5816,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
gro_result_t napi_gro_frags(struct napi_struct *napi)
{
+ gro_result_t ret;
struct sk_buff *skb = napi_frags_skb(napi);
if (!skb)
@@ -5769,7 +5824,10 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
trace_napi_gro_frags_entry(skb);
- return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
+ ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
+ trace_napi_gro_frags_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(napi_gro_frags);
@@ -5785,10 +5843,11 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
/* NAPI_GRO_CB(skb)->csum holds pseudo checksum */
sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
+ /* See comments in __skb_checksum_complete(). */
if (likely(!sum)) {
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
!skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
+ netdev_rx_csum_fault(skb->dev, skb);
}
NAPI_GRO_CB(skb)->csum = wsum;
@@ -7467,7 +7526,8 @@ unsigned int dev_get_flags(const struct net_device *dev)
}
EXPORT_SYMBOL(dev_get_flags);
-int __dev_change_flags(struct net_device *dev, unsigned int flags)
+int __dev_change_flags(struct net_device *dev, unsigned int flags,
+ struct netlink_ext_ack *extack)
{
unsigned int old_flags = dev->flags;
int ret;
@@ -7504,7 +7564,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
if (old_flags & IFF_UP)
__dev_close(dev);
else
- ret = __dev_open(dev);
+ ret = __dev_open(dev, extack);
}
if ((flags ^ dev->gflags) & IFF_PROMISC) {
@@ -7564,16 +7624,18 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
* dev_change_flags - change device settings
* @dev: device
* @flags: device state flags
+ * @extack: netlink extended ack
*
* Change settings on device based state flags. The flags are
* in the userspace exported format.
*/
-int dev_change_flags(struct net_device *dev, unsigned int flags)
+int dev_change_flags(struct net_device *dev, unsigned int flags,
+ struct netlink_ext_ack *extack)
{
int ret;
unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
- ret = __dev_change_flags(dev, flags);
+ ret = __dev_change_flags(dev, flags, extack);
if (ret < 0)
return ret;
@@ -7706,13 +7768,36 @@ void dev_set_group(struct net_device *dev, int new_group)
EXPORT_SYMBOL(dev_set_group);
/**
+ * dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
+ * @dev: device
+ * @addr: new address
+ * @extack: netlink extended ack
+ */
+int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_pre_changeaddr_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .dev_addr = addr,
+ };
+ int rc;
+
+ rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info);
+ return notifier_to_errno(rc);
+}
+EXPORT_SYMBOL(dev_pre_changeaddr_notify);
+
+/**
* dev_set_mac_address - Change Media Access Control Address
* @dev: device
* @sa: new address
+ * @extack: netlink extended ack
*
* Change the hardware (MAC) address of the device
*/
-int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
+int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
+ struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
int err;
@@ -7723,6 +7808,9 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
+ err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
+ if (err)
+ return err;
err = ops->ndo_set_mac_address(dev, sa);
if (err)
return err;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index d884d8f5f0e5..a6723b306717 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -278,6 +278,103 @@ int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
EXPORT_SYMBOL(__hw_addr_sync_dev);
/**
+ * __hw_addr_ref_sync_dev - Synchronize device's multicast address list taking
+ * into account references
+ * @list: address list to synchronize
+ * @dev: device to sync
+ * @sync: function to call if address or reference on it should be added
+ * @unsync: function to call if address or some reference on it should removed
+ *
+ * This function is intended to be called from the ndo_set_rx_mode
+ * function of devices that require explicit address or references on it
+ * add/remove notifications. The unsync function may be NULL in which case
+ * the addresses or references on it requiring removal will simply be
+ * removed without any notification to the device. That is responsibility of
+ * the driver to identify and distribute address or references on it between
+ * internal address tables.
+ **/
+int __hw_addr_ref_sync_dev(struct netdev_hw_addr_list *list,
+ struct net_device *dev,
+ int (*sync)(struct net_device *,
+ const unsigned char *, int),
+ int (*unsync)(struct net_device *,
+ const unsigned char *, int))
+{
+ struct netdev_hw_addr *ha, *tmp;
+ int err, ref_cnt;
+
+ /* first go through and flush out any unsynced/stale entries */
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ /* sync if address is not used */
+ if ((ha->sync_cnt << 1) <= ha->refcount)
+ continue;
+
+ /* if fails defer unsyncing address */
+ ref_cnt = ha->refcount - ha->sync_cnt;
+ if (unsync && unsync(dev, ha->addr, ref_cnt))
+ continue;
+
+ ha->refcount = (ref_cnt << 1) + 1;
+ ha->sync_cnt = ref_cnt;
+ __hw_addr_del_entry(list, ha, false, false);
+ }
+
+ /* go through and sync updated/new entries to the list */
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ /* sync if address added or reused */
+ if ((ha->sync_cnt << 1) >= ha->refcount)
+ continue;
+
+ ref_cnt = ha->refcount - ha->sync_cnt;
+ err = sync(dev, ha->addr, ref_cnt);
+ if (err)
+ return err;
+
+ ha->refcount = ref_cnt << 1;
+ ha->sync_cnt = ref_cnt;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(__hw_addr_ref_sync_dev);
+
+/**
+ * __hw_addr_ref_unsync_dev - Remove synchronized addresses and references on
+ * it from device
+ * @list: address list to remove synchronized addresses (references on it) from
+ * @dev: device to sync
+ * @unsync: function to call if address and references on it should be removed
+ *
+ * Remove all addresses that were added to the device by
+ * __hw_addr_ref_sync_dev(). This function is intended to be called from the
+ * ndo_stop or ndo_open functions on devices that require explicit address (or
+ * references on it) add/remove notifications. If the unsync function pointer
+ * is NULL then this function can be used to just reset the sync_cnt for the
+ * addresses in the list.
+ **/
+void __hw_addr_ref_unsync_dev(struct netdev_hw_addr_list *list,
+ struct net_device *dev,
+ int (*unsync)(struct net_device *,
+ const unsigned char *, int))
+{
+ struct netdev_hw_addr *ha, *tmp;
+
+ list_for_each_entry_safe(ha, tmp, &list->list, list) {
+ if (!ha->sync_cnt)
+ continue;
+
+ /* if fails defer unsyncing address */
+ if (unsync && unsync(dev, ha->addr, ha->sync_cnt))
+ continue;
+
+ ha->refcount -= ha->sync_cnt - 1;
+ ha->sync_cnt = 0;
+ __hw_addr_del_entry(list, ha, false, false);
+ }
+}
+EXPORT_SYMBOL(__hw_addr_ref_unsync_dev);
+
+/**
* __hw_addr_unsync_dev - Remove synchronized addresses from device
* @list: address list to remove synchronized addresses from
* @dev: device to sync
@@ -401,6 +498,9 @@ int dev_addr_add(struct net_device *dev, const unsigned char *addr,
ASSERT_RTNL();
+ err = dev_pre_changeaddr_notify(dev, addr, NULL);
+ if (err)
+ return err;
err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
if (!err)
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 90e8aa36881e..31380fd5a4e2 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -234,7 +234,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
switch (cmd) {
case SIOCSIFFLAGS: /* Set interface flags */
- return dev_change_flags(dev, ifr->ifr_flags);
+ return dev_change_flags(dev, ifr->ifr_flags, NULL);
case SIOCSIFMETRIC: /* Set the metric on the interface
(currently unused) */
@@ -246,7 +246,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
case SIOCSIFHWADDR:
if (dev->addr_len > sizeof(struct sockaddr))
return -EINVAL;
- return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
+ return dev_set_mac_address(dev, &ifr->ifr_hwaddr, NULL);
case SIOCSIFHWBROADCAST:
if (ifr->ifr_hwaddr.sa_family != dev->type)
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 3a4b29a13d31..abb0da9d7b4b 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2692,6 +2692,11 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME,
.type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE,
},
+ {
+ .id = DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
+ .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
+ .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
+ },
};
static int devlink_param_generic_verify(const struct devlink_param *param)
diff --git a/net/core/filter.c b/net/core/filter.c
index 8d2c629501e2..447dd1bad31f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -296,22 +296,18 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
break;
case SKF_AD_VLAN_TAG:
- case SKF_AD_VLAN_TAG_PRESENT:
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
- BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
offsetof(struct sk_buff, vlan_tci));
- if (skb_field == SKF_AD_VLAN_TAG) {
- *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg,
- ~VLAN_TAG_PRESENT);
- } else {
- /* dst_reg >>= 12 */
- *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12);
- /* dst_reg &= 1 */
+ break;
+ case SKF_AD_VLAN_TAG_PRESENT:
+ *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET());
+ if (PKT_VLAN_PRESENT_BIT)
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT);
+ if (PKT_VLAN_PRESENT_BIT < 7)
*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
- }
break;
}
@@ -467,7 +463,8 @@ static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
bool ldx_off_ok = offset <= S16_MAX;
*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
- *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
+ if (offset)
+ *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
*insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
size, 2 + endian + (!ldx_off_ok * 2));
if (ldx_off_ok) {
@@ -2428,6 +2425,174 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = {
.arg4_type = ARG_ANYTHING,
};
+static void sk_msg_shift_left(struct sk_msg *msg, int i)
+{
+ int prev;
+
+ do {
+ prev = i;
+ sk_msg_iter_var_next(i);
+ msg->sg.data[prev] = msg->sg.data[i];
+ } while (i != msg->sg.end);
+
+ sk_msg_iter_prev(msg, end);
+}
+
+static void sk_msg_shift_right(struct sk_msg *msg, int i)
+{
+ struct scatterlist tmp, sge;
+
+ sk_msg_iter_next(msg, end);
+ sge = sk_msg_elem_cpy(msg, i);
+ sk_msg_iter_var_next(i);
+ tmp = sk_msg_elem_cpy(msg, i);
+
+ while (i != msg->sg.end) {
+ msg->sg.data[i] = sge;
+ sk_msg_iter_var_next(i);
+ sge = tmp;
+ tmp = sk_msg_elem_cpy(msg, i);
+ }
+}
+
+BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
+ u32, len, u64, flags)
+{
+ u32 i = 0, l, space, offset = 0;
+ u64 last = start + len;
+ int pop;
+
+ if (unlikely(flags))
+ return -EINVAL;
+
+ /* First find the starting scatterlist element */
+ i = msg->sg.start;
+ do {
+ l = sk_msg_elem(msg, i)->length;
+
+ if (start < offset + l)
+ break;
+ offset += l;
+ sk_msg_iter_var_next(i);
+ } while (i != msg->sg.end);
+
+ /* Bounds checks: start and pop must be inside message */
+ if (start >= offset + l || last >= msg->sg.size)
+ return -EINVAL;
+
+ space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
+
+ pop = len;
+ /* --------------| offset
+ * -| start |-------- len -------|
+ *
+ * |----- a ----|-------- pop -------|----- b ----|
+ * |______________________________________________| length
+ *
+ *
+ * a: region at front of scatter element to save
+ * b: region at back of scatter element to save when length > A + pop
+ * pop: region to pop from element, same as input 'pop' here will be
+ * decremented below per iteration.
+ *
+ * Two top-level cases to handle when start != offset, first B is non
+ * zero and second B is zero corresponding to when a pop includes more
+ * than one element.
+ *
+ * Then if B is non-zero AND there is no space allocate space and
+ * compact A, B regions into page. If there is space shift ring to
+ * the rigth free'ing the next element in ring to place B, leaving
+ * A untouched except to reduce length.
+ */
+ if (start != offset) {
+ struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
+ int a = start;
+ int b = sge->length - pop - a;
+
+ sk_msg_iter_var_next(i);
+
+ if (pop < sge->length - a) {
+ if (space) {
+ sge->length = a;
+ sk_msg_shift_right(msg, i);
+ nsge = sk_msg_elem(msg, i);
+ get_page(sg_page(sge));
+ sg_set_page(nsge,
+ sg_page(sge),
+ b, sge->offset + pop + a);
+ } else {
+ struct page *page, *orig;
+ u8 *to, *from;
+
+ page = alloc_pages(__GFP_NOWARN |
+ __GFP_COMP | GFP_ATOMIC,
+ get_order(a + b));
+ if (unlikely(!page))
+ return -ENOMEM;
+
+ sge->length = a;
+ orig = sg_page(sge);
+ from = sg_virt(sge);
+ to = page_address(page);
+ memcpy(to, from, a);
+ memcpy(to + a, from + a + pop, b);
+ sg_set_page(sge, page, a + b, 0);
+ put_page(orig);
+ }
+ pop = 0;
+ } else if (pop >= sge->length - a) {
+ sge->length = a;
+ pop -= (sge->length - a);
+ }
+ }
+
+ /* From above the current layout _must_ be as follows,
+ *
+ * -| offset
+ * -| start
+ *
+ * |---- pop ---|---------------- b ------------|
+ * |____________________________________________| length
+ *
+ * Offset and start of the current msg elem are equal because in the
+ * previous case we handled offset != start and either consumed the
+ * entire element and advanced to the next element OR pop == 0.
+ *
+ * Two cases to handle here are first pop is less than the length
+ * leaving some remainder b above. Simply adjust the element's layout
+ * in this case. Or pop >= length of the element so that b = 0. In this
+ * case advance to next element decrementing pop.
+ */
+ while (pop) {
+ struct scatterlist *sge = sk_msg_elem(msg, i);
+
+ if (pop < sge->length) {
+ sge->length -= pop;
+ sge->offset += pop;
+ pop = 0;
+ } else {
+ pop -= sge->length;
+ sk_msg_shift_left(msg, i);
+ }
+ sk_msg_iter_var_next(i);
+ }
+
+ sk_mem_uncharge(msg->sk, len - pop);
+ msg->sg.size -= (len - pop);
+ sk_msg_compute_data_pointers(msg);
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_pop_data_proto = {
+ .func = bpf_msg_pop_data,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_ANYTHING,
+};
+
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
{
return task_get_classid(skb);
@@ -3908,6 +4073,26 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+BPF_CALL_5(bpf_sockopt_event_output, struct bpf_sock_ops_kern *, bpf_sock,
+ struct bpf_map *, map, u64, flags, void *, data, u64, size)
+{
+ if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
+ return -EINVAL;
+
+ return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
+}
+
+static const struct bpf_func_proto bpf_sockopt_event_output_proto = {
+ .func = bpf_sockopt_event_output,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_CONST_MAP_PTR,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_MEM,
+ .arg5_type = ARG_CONST_SIZE_OR_ZERO,
+};
+
BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
int, level, int, optname, char *, optval, int, optlen)
{
@@ -4825,37 +5010,31 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
#ifdef CONFIG_INET
static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
- struct sk_buff *skb, u8 family, u8 proto)
+ int dif, int sdif, u8 family, u8 proto)
{
bool refcounted = false;
struct sock *sk = NULL;
- int dif = 0;
-
- if (skb->dev)
- dif = skb->dev->ifindex;
if (family == AF_INET) {
__be32 src4 = tuple->ipv4.saddr;
__be32 dst4 = tuple->ipv4.daddr;
- int sdif = inet_sdif(skb);
if (proto == IPPROTO_TCP)
- sk = __inet_lookup(net, &tcp_hashinfo, skb, 0,
+ sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0,
src4, tuple->ipv4.sport,
dst4, tuple->ipv4.dport,
dif, sdif, &refcounted);
else
sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
dst4, tuple->ipv4.dport,
- dif, sdif, &udp_table, skb);
+ dif, sdif, &udp_table, NULL);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
- int sdif = inet6_sdif(skb);
if (proto == IPPROTO_TCP)
- sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0,
+ sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
src6, tuple->ipv6.sport,
dst6, ntohs(tuple->ipv6.dport),
dif, sdif, &refcounted);
@@ -4864,7 +5043,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
src6, tuple->ipv6.sport,
dst6, tuple->ipv6.dport,
dif, sdif,
- &udp_table, skb);
+ &udp_table, NULL);
#endif
}
@@ -4881,31 +5060,33 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
* callers to satisfy BPF_CALL declarations.
*/
static unsigned long
-bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
- u8 proto, u64 netns_id, u64 flags)
+__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
+ u64 flags)
{
- struct net *caller_net;
struct sock *sk = NULL;
u8 family = AF_UNSPEC;
struct net *net;
+ int sdif;
family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
if (unlikely(family == AF_UNSPEC || flags ||
!((s32)netns_id < 0 || netns_id <= S32_MAX)))
goto out;
- if (skb->dev)
- caller_net = dev_net(skb->dev);
+ if (family == AF_INET)
+ sdif = inet_sdif(skb);
else
- caller_net = sock_net(skb->sk);
+ sdif = inet6_sdif(skb);
+
if ((s32)netns_id < 0) {
net = caller_net;
- sk = sk_lookup(net, tuple, skb, family, proto);
+ sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
} else {
net = get_net_ns_by_id(caller_net, netns_id);
if (unlikely(!net))
goto out;
- sk = sk_lookup(net, tuple, skb, family, proto);
+ sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
put_net(net);
}
@@ -4915,6 +5096,25 @@ out:
return (unsigned long) sk;
}
+static unsigned long
+bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
+ u8 proto, u64 netns_id, u64 flags)
+{
+ struct net *caller_net;
+ int ifindex;
+
+ if (skb->dev) {
+ caller_net = dev_net(skb->dev);
+ ifindex = skb->dev->ifindex;
+ } else {
+ caller_net = sock_net(skb->sk);
+ ifindex = 0;
+ }
+
+ return __bpf_sk_lookup(skb, tuple, len, caller_net, ifindex,
+ proto, netns_id, flags);
+}
+
BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
{
@@ -4964,6 +5164,87 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_SOCKET,
};
+
+BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+ struct net *caller_net = dev_net(ctx->rxq->dev);
+ int ifindex = ctx->rxq->dev->ifindex;
+
+ return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
+ IPPROTO_UDP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
+ .func = bpf_xdp_sk_lookup_udp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
+{
+ struct net *caller_net = dev_net(ctx->rxq->dev);
+ int ifindex = ctx->rxq->dev->ifindex;
+
+ return __bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex,
+ IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
+ .func = bpf_xdp_sk_lookup_tcp,
+ .gpl_only = false,
+ .pkt_access = true,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
+ IPPROTO_TCP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
+ .func = bpf_sock_addr_sk_lookup_tcp,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
+ struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
+{
+ return __bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0,
+ IPPROTO_UDP, netns_id, flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
+ .func = bpf_sock_addr_sk_lookup_udp,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+ .arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
+};
+
#endif /* CONFIG_INET */
bool bpf_helper_changes_pkt_data(void *func)
@@ -4986,6 +5267,7 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_xdp_adjust_meta ||
func == bpf_msg_pull_data ||
func == bpf_msg_push_data ||
+ func == bpf_msg_pop_data ||
func == bpf_xdp_adjust_tail ||
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
func == bpf_lwt_seg6_store_bytes ||
@@ -5070,6 +5352,14 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_cookie_sock_addr_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
+#ifdef CONFIG_INET
+ case BPF_FUNC_sk_lookup_tcp:
+ return &bpf_sock_addr_sk_lookup_tcp_proto;
+ case BPF_FUNC_sk_lookup_udp:
+ return &bpf_sock_addr_sk_lookup_udp_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
+#endif /* CONFIG_INET */
default:
return bpf_base_func_proto(func_id);
}
@@ -5214,6 +5504,14 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_xdp_adjust_tail_proto;
case BPF_FUNC_fib_lookup:
return &bpf_xdp_fib_lookup_proto;
+#ifdef CONFIG_INET
+ case BPF_FUNC_sk_lookup_udp:
+ return &bpf_xdp_sk_lookup_udp_proto;
+ case BPF_FUNC_sk_lookup_tcp:
+ return &bpf_xdp_sk_lookup_tcp_proto;
+ case BPF_FUNC_sk_release:
+ return &bpf_sk_release_proto;
+#endif
default:
return bpf_base_func_proto(func_id);
}
@@ -5240,6 +5538,8 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_cookie_sock_ops_proto;
case BPF_FUNC_get_local_storage:
return &bpf_get_local_storage_proto;
+ case BPF_FUNC_perf_event_output:
+ return &bpf_sockopt_event_output_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -5264,6 +5564,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_msg_pull_data_proto;
case BPF_FUNC_msg_push_data:
return &bpf_msg_push_data_proto;
+ case BPF_FUNC_msg_pop_data:
+ return &bpf_msg_pop_data_proto;
default:
return bpf_base_func_proto(func_id);
}
@@ -5440,6 +5742,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
if (size != sizeof(__u64))
return false;
break;
+ case bpf_ctx_range(struct __sk_buff, tstamp):
+ if (size != sizeof(__u64))
+ return false;
+ break;
default:
/* Only narrow read access allowed for now. */
if (type == BPF_WRITE) {
@@ -5467,6 +5773,8 @@ static bool sk_filter_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data_end):
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
+ case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
}
@@ -5491,6 +5799,7 @@ static bool cg_skb_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+ case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
case bpf_ctx_range(struct __sk_buff, data):
case bpf_ctx_range(struct __sk_buff, data_end):
@@ -5505,6 +5814,10 @@ static bool cg_skb_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, priority):
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
break;
+ case bpf_ctx_range(struct __sk_buff, tstamp):
+ if (!capable(CAP_SYS_ADMIN))
+ return false;
+ break;
default:
return false;
}
@@ -5532,6 +5845,8 @@ static bool lwt_is_valid_access(int off, int size,
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
+ case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
}
@@ -5741,6 +6056,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, priority):
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
break;
default:
return false;
@@ -5960,6 +6276,8 @@ static bool sk_skb_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
+ case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
}
@@ -5995,6 +6313,9 @@ static bool sk_msg_is_valid_access(int off, int size,
if (type == BPF_WRITE)
return false;
+ if (off % size != 0)
+ return false;
+
switch (off) {
case offsetof(struct sk_msg_md, data):
info->reg_type = PTR_TO_PACKET;
@@ -6006,16 +6327,20 @@ static bool sk_msg_is_valid_access(int off, int size,
if (size != sizeof(__u64))
return false;
break;
- default:
+ case bpf_ctx_range(struct sk_msg_md, family):
+ case bpf_ctx_range(struct sk_msg_md, remote_ip4):
+ case bpf_ctx_range(struct sk_msg_md, local_ip4):
+ case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[0], remote_ip6[3]):
+ case bpf_ctx_range_till(struct sk_msg_md, local_ip6[0], local_ip6[3]):
+ case bpf_ctx_range(struct sk_msg_md, remote_port):
+ case bpf_ctx_range(struct sk_msg_md, local_port):
+ case bpf_ctx_range(struct sk_msg_md, size):
if (size != sizeof(__u32))
return false;
- }
-
- if (off < 0 || off >= sizeof(struct sk_msg_md))
- return false;
- if (off % size != 0)
+ break;
+ default:
return false;
-
+ }
return true;
}
@@ -6046,6 +6371,8 @@ static bool flow_dissector_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ case bpf_ctx_range(struct __sk_buff, tstamp):
+ case bpf_ctx_range(struct __sk_buff, wire_len):
return false;
}
@@ -6140,19 +6467,19 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct __sk_buff, vlan_present):
- case offsetof(struct __sk_buff, vlan_tci):
- BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+ *target_size = 1;
+ *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
+ PKT_VLAN_PRESENT_OFFSET());
+ if (PKT_VLAN_PRESENT_BIT)
+ *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT);
+ if (PKT_VLAN_PRESENT_BIT < 7)
+ *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
+ break;
+ case offsetof(struct __sk_buff, vlan_tci):
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
bpf_target_off(struct sk_buff, vlan_tci, 2,
target_size));
- if (si->off == offsetof(struct __sk_buff, vlan_tci)) {
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg,
- ~VLAN_TAG_PRESENT);
- } else {
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
- }
break;
case offsetof(struct __sk_buff, cb[0]) ...
@@ -6355,6 +6682,33 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
si->src_reg, off);
break;
+
+ case offsetof(struct __sk_buff, tstamp):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
+
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_DW,
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sk_buff,
+ tstamp, 8,
+ target_size));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_DW,
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sk_buff,
+ tstamp, 8,
+ target_size));
+ break;
+
+ case offsetof(struct __sk_buff, wire_len):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
+
+ off = si->off;
+ off -= offsetof(struct __sk_buff, wire_len);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct qdisc_skb_cb, pkt_len);
+ *target_size = 4;
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
}
return insn - insn_buf;
@@ -7071,6 +7425,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
int off;
#endif
+ /* convert ctx uses the fact sg element is first in struct */
+ BUILD_BUG_ON(offsetof(struct sk_msg, sg) != 0);
+
switch (si->off) {
case offsetof(struct sk_msg_md, data):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
@@ -7183,6 +7540,12 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
offsetof(struct sock_common, skc_num));
break;
+
+ case offsetof(struct sk_msg_md, size):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_sg, size),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_sg, size));
+ break;
}
return insn - insn_buf;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index af68207ee56c..9f2840510e63 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -956,8 +956,7 @@ proto_again:
if (!vlan) {
key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
- key_vlan->vlan_priority =
- (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
+ key_vlan->vlan_priority = skb_vlan_tag_get_prio(skb);
} else {
key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) &
VLAN_VID_MASK;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5fa32c064baf..763a7b08df67 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -118,21 +118,77 @@ unsigned long neigh_rand_reach_time(unsigned long base)
}
EXPORT_SYMBOL(neigh_rand_reach_time);
+static void neigh_mark_dead(struct neighbour *n)
+{
+ n->dead = 1;
+ if (!list_empty(&n->gc_list)) {
+ list_del_init(&n->gc_list);
+ atomic_dec(&n->tbl->gc_entries);
+ }
+}
+
+static void neigh_update_gc_list(struct neighbour *n)
+{
+ bool on_gc_list, exempt_from_gc;
+
+ write_lock_bh(&n->tbl->lock);
+ write_lock(&n->lock);
+
+ /* remove from the gc list if new state is permanent or if neighbor
+ * is externally learned; otherwise entry should be on the gc list
+ */
+ exempt_from_gc = n->nud_state & NUD_PERMANENT ||
+ n->flags & NTF_EXT_LEARNED;
+ on_gc_list = !list_empty(&n->gc_list);
+
+ if (exempt_from_gc && on_gc_list) {
+ list_del_init(&n->gc_list);
+ atomic_dec(&n->tbl->gc_entries);
+ } else if (!exempt_from_gc && !on_gc_list) {
+ /* add entries to the tail; cleaning removes from the front */
+ list_add_tail(&n->gc_list, &n->tbl->gc_list);
+ atomic_inc(&n->tbl->gc_entries);
+ }
+
+ write_unlock(&n->lock);
+ write_unlock_bh(&n->tbl->lock);
+}
-static bool neigh_del(struct neighbour *n, __u8 state, __u8 flags,
- struct neighbour __rcu **np, struct neigh_table *tbl)
+static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags,
+ int *notify)
+{
+ bool rc = false;
+ u8 ndm_flags;
+
+ if (!(flags & NEIGH_UPDATE_F_ADMIN))
+ return rc;
+
+ ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0;
+ if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) {
+ if (ndm_flags & NTF_EXT_LEARNED)
+ neigh->flags |= NTF_EXT_LEARNED;
+ else
+ neigh->flags &= ~NTF_EXT_LEARNED;
+ rc = true;
+ *notify = 1;
+ }
+
+ return rc;
+}
+
+static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np,
+ struct neigh_table *tbl)
{
bool retval = false;
write_lock(&n->lock);
- if (refcount_read(&n->refcnt) == 1 && !(n->nud_state & state) &&
- !(n->flags & flags)) {
+ if (refcount_read(&n->refcnt) == 1) {
struct neighbour *neigh;
neigh = rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock));
rcu_assign_pointer(*np, neigh);
- n->dead = 1;
+ neigh_mark_dead(n);
retval = true;
}
write_unlock(&n->lock);
@@ -158,7 +214,7 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
while ((n = rcu_dereference_protected(*np,
lockdep_is_held(&tbl->lock)))) {
if (n == ndel)
- return neigh_del(n, 0, 0, np, tbl);
+ return neigh_del(n, np, tbl);
np = &n->next;
}
return false;
@@ -166,32 +222,29 @@ bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
static int neigh_forced_gc(struct neigh_table *tbl)
{
+ int max_clean = atomic_read(&tbl->gc_entries) - tbl->gc_thresh2;
+ unsigned long tref = jiffies - 5 * HZ;
+ struct neighbour *n, *tmp;
int shrunk = 0;
- int i;
- struct neigh_hash_table *nht;
NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
write_lock_bh(&tbl->lock);
- nht = rcu_dereference_protected(tbl->nht,
- lockdep_is_held(&tbl->lock));
- for (i = 0; i < (1 << nht->hash_shift); i++) {
- struct neighbour *n;
- struct neighbour __rcu **np;
- np = &nht->hash_buckets[i];
- while ((n = rcu_dereference_protected(*np,
- lockdep_is_held(&tbl->lock))) != NULL) {
- /* Neighbour record may be discarded if:
- * - nobody refers to it.
- * - it is not permanent
- */
- if (neigh_del(n, NUD_PERMANENT, NTF_EXT_LEARNED, np,
- tbl)) {
- shrunk = 1;
- continue;
- }
- np = &n->next;
+ list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) {
+ if (refcount_read(&n->refcnt) == 1) {
+ bool remove = false;
+
+ write_lock(&n->lock);
+ if ((n->nud_state == NUD_FAILED) ||
+ time_after(tref, n->updated))
+ remove = true;
+ write_unlock(&n->lock);
+
+ if (remove && neigh_remove_one(n, tbl))
+ shrunk++;
+ if (shrunk >= max_clean)
+ break;
}
}
@@ -260,8 +313,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
lockdep_is_held(&tbl->lock)));
write_lock(&n->lock);
neigh_del_timer(n);
- n->dead = 1;
-
+ neigh_mark_dead(n);
if (refcount_read(&n->refcnt) != 1) {
/* The most unpleasant situation.
We must destroy neighbour entry,
@@ -321,13 +373,18 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
}
EXPORT_SYMBOL(neigh_ifdown);
-static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
+static struct neighbour *neigh_alloc(struct neigh_table *tbl,
+ struct net_device *dev,
+ bool exempt_from_gc)
{
struct neighbour *n = NULL;
unsigned long now = jiffies;
int entries;
- entries = atomic_inc_return(&tbl->entries) - 1;
+ if (exempt_from_gc)
+ goto do_alloc;
+
+ entries = atomic_inc_return(&tbl->gc_entries) - 1;
if (entries >= tbl->gc_thresh3 ||
(entries >= tbl->gc_thresh2 &&
time_after(now, tbl->last_flush + 5 * HZ))) {
@@ -340,6 +397,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
}
}
+do_alloc:
n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
if (!n)
goto out_entries;
@@ -358,11 +416,15 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
n->tbl = tbl;
refcount_set(&n->refcnt, 1);
n->dead = 1;
+ INIT_LIST_HEAD(&n->gc_list);
+
+ atomic_inc(&tbl->entries);
out:
return n;
out_entries:
- atomic_dec(&tbl->entries);
+ if (!exempt_from_gc)
+ atomic_dec(&tbl->gc_entries);
goto out;
}
@@ -505,13 +567,15 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
}
EXPORT_SYMBOL(neigh_lookup_nodev);
-struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
- struct net_device *dev, bool want_ref)
+static struct neighbour *___neigh_create(struct neigh_table *tbl,
+ const void *pkey,
+ struct net_device *dev,
+ bool exempt_from_gc, bool want_ref)
{
+ struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc);
u32 hash_val;
unsigned int key_len = tbl->key_len;
int error;
- struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
struct neigh_hash_table *nht;
if (!n) {
@@ -574,6 +638,9 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
}
n->dead = 0;
+ if (!exempt_from_gc)
+ list_add_tail(&n->gc_list, &n->tbl->gc_list);
+
if (want_ref)
neigh_hold(n);
rcu_assign_pointer(n->next,
@@ -591,6 +658,12 @@ out_neigh_release:
neigh_release(n);
goto out;
}
+
+struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
+ struct net_device *dev, bool want_ref)
+{
+ return ___neigh_create(tbl, pkey, dev, false, want_ref);
+}
EXPORT_SYMBOL(__neigh_create);
static u32 pneigh_hash(const void *pkey, unsigned int key_len)
@@ -652,6 +725,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
if (!n)
goto out;
+ n->protocol = 0;
write_pnet(&n->net, net);
memcpy(n->key, pkey, key_len);
n->dev = dev;
@@ -854,7 +928,7 @@ static void neigh_periodic_work(struct work_struct *work)
(state == NUD_FAILED ||
time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
*np = n->next;
- n->dead = 1;
+ neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
continue;
@@ -1137,9 +1211,11 @@ static void neigh_update_hhs(struct neighbour *neigh)
Caller MUST hold reference count on the entry.
*/
-int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
- u32 flags, u32 nlmsg_pid)
+static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
+ u8 new, u32 flags, u32 nlmsg_pid,
+ struct netlink_ext_ack *extack)
{
+ bool ext_learn_change = false;
u8 old;
int err;
int notify = 0;
@@ -1155,10 +1231,12 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
(old & (NUD_NOARP | NUD_PERMANENT)))
goto out;
- if (neigh->dead)
+ if (neigh->dead) {
+ NL_SET_ERR_MSG(extack, "Neighbor entry is now dead");
goto out;
+ }
- neigh_update_ext_learned(neigh, flags, &notify);
+ ext_learn_change = neigh_update_ext_learned(neigh, flags, &notify);
if (!(new & NUD_VALID)) {
neigh_del_timer(neigh);
@@ -1193,8 +1271,10 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
use it, otherwise discard the request.
*/
err = -EINVAL;
- if (!(old & NUD_VALID))
+ if (!(old & NUD_VALID)) {
+ NL_SET_ERR_MSG(extack, "No link layer address given");
goto out;
+ }
lladdr = neigh->ha;
}
@@ -1302,11 +1382,20 @@ out:
neigh_update_is_router(neigh, flags, &notify);
write_unlock_bh(&neigh->lock);
+ if (((new ^ old) & NUD_PERMANENT) || ext_learn_change)
+ neigh_update_gc_list(neigh);
+
if (notify)
neigh_update_notify(neigh, nlmsg_pid);
return err;
}
+
+int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
+ u32 flags, u32 nlmsg_pid)
+{
+ return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL);
+}
EXPORT_SYMBOL(neigh_update);
/* Update the neigh to listen temporarily for probe responses, even if it is
@@ -1571,6 +1660,7 @@ void neigh_table_init(int index, struct neigh_table *tbl)
unsigned long phsize;
INIT_LIST_HEAD(&tbl->parms_list);
+ INIT_LIST_HEAD(&tbl->gc_list);
list_add(&tbl->parms.list, &tbl->parms_list);
write_pnet(&tbl->parms.net, &init_net);
refcount_set(&tbl->parms.refcnt, 1);
@@ -1662,6 +1752,19 @@ static struct neigh_table *neigh_find_table(int family)
return tbl;
}
+const struct nla_policy nda_policy[NDA_MAX+1] = {
+ [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+ [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+ [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
+ [NDA_PROBES] = { .type = NLA_U32 },
+ [NDA_VLAN] = { .type = NLA_U16 },
+ [NDA_PORT] = { .type = NLA_U16 },
+ [NDA_VNI] = { .type = NLA_U32 },
+ [NDA_IFINDEX] = { .type = NLA_U32 },
+ [NDA_MASTER] = { .type = NLA_U32 },
+ [NDA_PROTOCOL] = { .type = NLA_U8 },
+};
+
static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -1678,8 +1781,10 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
- if (dst_attr == NULL)
+ if (!dst_attr) {
+ NL_SET_ERR_MSG(extack, "Network address not specified");
goto out;
+ }
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
@@ -1694,8 +1799,10 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tbl == NULL)
return -EAFNOSUPPORT;
- if (nla_len(dst_attr) < (int)tbl->key_len)
+ if (nla_len(dst_attr) < (int)tbl->key_len) {
+ NL_SET_ERR_MSG(extack, "Invalid network address");
goto out;
+ }
if (ndm->ndm_flags & NTF_PROXY) {
err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
@@ -1711,10 +1818,9 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
- err = neigh_update(neigh, NULL, NUD_FAILED,
- NEIGH_UPDATE_F_OVERRIDE |
- NEIGH_UPDATE_F_ADMIN,
- NETLINK_CB(skb).portid);
+ err = __neigh_update(neigh, NULL, NUD_FAILED,
+ NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN,
+ NETLINK_CB(skb).portid, extack);
write_lock_bh(&tbl->lock);
neigh_release(neigh);
neigh_remove_one(neigh, tbl);
@@ -1736,16 +1842,19 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net_device *dev = NULL;
struct neighbour *neigh;
void *dst, *lladdr;
+ u8 protocol = 0;
int err;
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack);
+ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, nda_policy, extack);
if (err < 0)
goto out;
err = -EINVAL;
- if (tb[NDA_DST] == NULL)
+ if (!tb[NDA_DST]) {
+ NL_SET_ERR_MSG(extack, "Network address not specified");
goto out;
+ }
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex) {
@@ -1755,19 +1864,27 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
- if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
+ if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) {
+ NL_SET_ERR_MSG(extack, "Invalid link address");
goto out;
+ }
}
tbl = neigh_find_table(ndm->ndm_family);
if (tbl == NULL)
return -EAFNOSUPPORT;
- if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
+ if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) {
+ NL_SET_ERR_MSG(extack, "Invalid network address");
goto out;
+ }
+
dst = nla_data(tb[NDA_DST]);
lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
+ if (tb[NDA_PROTOCOL])
+ protocol = nla_get_u8(tb[NDA_PROTOCOL]);
+
if (ndm->ndm_flags & NTF_PROXY) {
struct pneigh_entry *pn;
@@ -1775,22 +1892,30 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
pn = pneigh_lookup(tbl, net, dst, dev, 1);
if (pn) {
pn->flags = ndm->ndm_flags;
+ if (protocol)
+ pn->protocol = protocol;
err = 0;
}
goto out;
}
- if (dev == NULL)
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Device not specified");
goto out;
+ }
neigh = neigh_lookup(tbl, dst, dev);
if (neigh == NULL) {
+ bool exempt_from_gc;
+
if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
err = -ENOENT;
goto out;
}
- neigh = __neigh_lookup_errno(tbl, dst, dev);
+ exempt_from_gc = ndm->ndm_state & NUD_PERMANENT ||
+ ndm->ndm_flags & NTF_EXT_LEARNED;
+ neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true);
if (IS_ERR(neigh)) {
err = PTR_ERR(neigh);
goto out;
@@ -1817,8 +1942,12 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
neigh_event_send(neigh, NULL);
err = 0;
} else
- err = neigh_update(neigh, lladdr, ndm->ndm_state, flags,
- NETLINK_CB(skb).portid);
+ err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags,
+ NETLINK_CB(skb).portid, extack);
+
+ if (protocol)
+ neigh->protocol = protocol;
+
neigh_release(neigh);
out:
@@ -2312,6 +2441,9 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
goto nla_put_failure;
+ if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol))
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
@@ -2343,6 +2475,9 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
goto nla_put_failure;
+ if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol))
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
@@ -2505,10 +2640,10 @@ static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
}
err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
- NULL, extack);
+ nda_policy, extack);
} else {
err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
- NULL, extack);
+ nda_policy, extack);
}
if (err < 0)
return err;
@@ -2520,17 +2655,9 @@ static int neigh_valid_dump_req(const struct nlmsghdr *nlh,
/* all new attributes should require strict_check */
switch (i) {
case NDA_IFINDEX:
- if (nla_len(tb[i]) != sizeof(u32)) {
- NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in neighbor dump request");
- return -EINVAL;
- }
filter->dev_idx = nla_get_u32(tb[i]);
break;
case NDA_MASTER:
- if (nla_len(tb[i]) != sizeof(u32)) {
- NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in neighbor dump request");
- return -EINVAL;
- }
filter->master_idx = nla_get_u32(tb[i]);
break;
default:
@@ -2590,6 +2717,186 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+static int neigh_valid_get_req(const struct nlmsghdr *nlh,
+ struct neigh_table **tbl,
+ void **dst, int *dev_idx, u8 *ndm_flags,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[NDA_MAX + 1];
+ struct ndmsg *ndm;
+ int err, i;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
+ return -EINVAL;
+ }
+
+ ndm = nlmsg_data(nlh);
+ if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
+ ndm->ndm_type) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
+ return -EINVAL;
+ }
+
+ if (ndm->ndm_flags & ~NTF_PROXY) {
+ NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
+ nda_policy, extack);
+ if (err < 0)
+ return err;
+
+ *ndm_flags = ndm->ndm_flags;
+ *dev_idx = ndm->ndm_ifindex;
+ *tbl = neigh_find_table(ndm->ndm_family);
+ if (*tbl == NULL) {
+ NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request");
+ return -EAFNOSUPPORT;
+ }
+
+ for (i = 0; i <= NDA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ switch (i) {
+ case NDA_DST:
+ if (nla_len(tb[i]) != (int)(*tbl)->key_len) {
+ NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request");
+ return -EINVAL;
+ }
+ *dst = nla_data(tb[i]);
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static inline size_t neigh_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ndmsg))
+ + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+ + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
+ + nla_total_size(sizeof(struct nda_cacheinfo))
+ + nla_total_size(4) /* NDA_PROBES */
+ + nla_total_size(1); /* NDA_PROTOCOL */
+}
+
+static int neigh_get_reply(struct net *net, struct neighbour *neigh,
+ u32 pid, u32 seq)
+{
+ struct sk_buff *skb;
+ int err = 0;
+
+ skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
+ if (err) {
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ err = rtnl_unicast(skb, net, pid);
+errout:
+ return err;
+}
+
+static inline size_t pneigh_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct ndmsg))
+ + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
+ + nla_total_size(1); /* NDA_PROTOCOL */
+}
+
+static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh,
+ u32 pid, u32 seq, struct neigh_table *tbl)
+{
+ struct sk_buff *skb;
+ int err = 0;
+
+ skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl);
+ if (err) {
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ err = rtnl_unicast(skb, net, pid);
+errout:
+ return err;
+}
+
+static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct net_device *dev = NULL;
+ struct neigh_table *tbl = NULL;
+ struct neighbour *neigh;
+ void *dst = NULL;
+ u8 ndm_flags = 0;
+ int dev_idx = 0;
+ int err;
+
+ err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags,
+ extack);
+ if (err < 0)
+ return err;
+
+ if (dev_idx) {
+ dev = __dev_get_by_index(net, dev_idx);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Unknown device ifindex");
+ return -ENODEV;
+ }
+ }
+
+ if (!dst) {
+ NL_SET_ERR_MSG(extack, "Network address not specified");
+ return -EINVAL;
+ }
+
+ if (ndm_flags & NTF_PROXY) {
+ struct pneigh_entry *pn;
+
+ pn = pneigh_lookup(tbl, net, dst, dev, 0);
+ if (!pn) {
+ NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found");
+ return -ENOENT;
+ }
+ return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, tbl);
+ }
+
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "No device specified");
+ return -EINVAL;
+ }
+
+ neigh = neigh_lookup(tbl, dst, dev);
+ if (!neigh) {
+ NL_SET_ERR_MSG(extack, "Neighbour entry not found");
+ return -ENOENT;
+ }
+
+ err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq);
+
+ neigh_release(neigh);
+
+ return err;
+}
+
void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
{
int chain;
@@ -2636,7 +2943,7 @@ void __neigh_for_each_release(struct neigh_table *tbl,
rcu_assign_pointer(*np,
rcu_dereference_protected(n->next,
lockdep_is_held(&tbl->lock)));
- n->dead = 1;
+ neigh_mark_dead(n);
} else
np = &n->next;
write_unlock(&n->lock);
@@ -2997,15 +3304,6 @@ static const struct seq_operations neigh_stat_seq_ops = {
};
#endif /* CONFIG_PROC_FS */
-static inline size_t neigh_nlmsg_size(void)
-{
- return NLMSG_ALIGN(sizeof(struct ndmsg))
- + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
- + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
- + nla_total_size(sizeof(struct nda_cacheinfo))
- + nla_total_size(4); /* NDA_PROBES */
-}
-
static void __neigh_notify(struct neighbour *n, int type, int flags,
u32 pid)
{
@@ -3389,7 +3687,7 @@ static int __init neigh_init(void)
{
rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0);
+ rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0);
rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
0);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index bd67c4d0fcfd..ff9fd2bb4ce4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -337,7 +337,7 @@ NETDEVICE_SHOW_RW(mtu, fmt_dec);
static int change_flags(struct net_device *dev, unsigned long new_flags)
{
- return dev_change_flags(dev, (unsigned int)new_flags);
+ return dev_change_flags(dev, (unsigned int)new_flags, NULL);
}
static ssize_t flags_store(struct device *dev, struct device_attribute *attr,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index fefe72774aeb..b02fb19df2cc 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -669,6 +669,7 @@ static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
[NETNSA_NSID] = { .type = NLA_S32 },
[NETNSA_PID] = { .type = NLA_U32 },
[NETNSA_FD] = { .type = NLA_U32 },
+ [NETNSA_TARGET_NSID] = { .type = NLA_S32 },
};
static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -735,23 +736,38 @@ static int rtnl_net_get_size(void)
{
return NLMSG_ALIGN(sizeof(struct rtgenmsg))
+ nla_total_size(sizeof(s32)) /* NETNSA_NSID */
+ + nla_total_size(sizeof(s32)) /* NETNSA_CURRENT_NSID */
;
}
-static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
- int cmd, struct net *net, int nsid)
+struct net_fill_args {
+ u32 portid;
+ u32 seq;
+ int flags;
+ int cmd;
+ int nsid;
+ bool add_ref;
+ int ref_nsid;
+};
+
+static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args)
{
struct nlmsghdr *nlh;
struct rtgenmsg *rth;
- nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
+ nlh = nlmsg_put(skb, args->portid, args->seq, args->cmd, sizeof(*rth),
+ args->flags);
if (!nlh)
return -EMSGSIZE;
rth = nlmsg_data(nlh);
rth->rtgen_family = AF_UNSPEC;
- if (nla_put_s32(skb, NETNSA_NSID, nsid))
+ if (nla_put_s32(skb, NETNSA_NSID, args->nsid))
+ goto nla_put_failure;
+
+ if (args->add_ref &&
+ nla_put_s32(skb, NETNSA_CURRENT_NSID, args->ref_nsid))
goto nla_put_failure;
nlmsg_end(skb, nlh);
@@ -767,10 +783,15 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
struct nlattr *tb[NETNSA_MAX + 1];
+ struct net_fill_args fillargs = {
+ .portid = NETLINK_CB(skb).portid,
+ .seq = nlh->nlmsg_seq,
+ .cmd = RTM_NEWNSID,
+ };
+ struct net *peer, *target = net;
struct nlattr *nla;
struct sk_buff *msg;
- struct net *peer;
- int err, id;
+ int err;
err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
rtnl_net_policy, extack);
@@ -782,6 +803,11 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
} else if (tb[NETNSA_FD]) {
peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
nla = tb[NETNSA_FD];
+ } else if (tb[NETNSA_NSID]) {
+ peer = get_net_ns_by_id(net, nla_get_u32(tb[NETNSA_NSID]));
+ if (!peer)
+ peer = ERR_PTR(-ENOENT);
+ nla = tb[NETNSA_NSID];
} else {
NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
return -EINVAL;
@@ -793,15 +819,29 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
return PTR_ERR(peer);
}
+ if (tb[NETNSA_TARGET_NSID]) {
+ int id = nla_get_s32(tb[NETNSA_TARGET_NSID]);
+
+ target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, id);
+ if (IS_ERR(target)) {
+ NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]);
+ NL_SET_ERR_MSG(extack,
+ "Target netns reference is invalid");
+ err = PTR_ERR(target);
+ goto out;
+ }
+ fillargs.add_ref = true;
+ fillargs.ref_nsid = peernet2id(net, peer);
+ }
+
msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
if (!msg) {
err = -ENOMEM;
goto out;
}
- id = peernet2id(net, peer);
- err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
- RTM_NEWNSID, net, id);
+ fillargs.nsid = peernet2id(target, peer);
+ err = rtnl_net_fill(msg, &fillargs);
if (err < 0)
goto err_out;
@@ -811,14 +851,17 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
err_out:
nlmsg_free(msg);
out:
+ if (fillargs.add_ref)
+ put_net(target);
put_net(peer);
return err;
}
struct rtnl_net_dump_cb {
- struct net *net;
+ struct net *tgt_net;
+ struct net *ref_net;
struct sk_buff *skb;
- struct netlink_callback *cb;
+ struct net_fill_args fillargs;
int idx;
int s_idx;
};
@@ -831,9 +874,10 @@ static int rtnl_net_dumpid_one(int id, void *peer, void *data)
if (net_cb->idx < net_cb->s_idx)
goto cont;
- ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid,
- net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWNSID, net_cb->net, id);
+ net_cb->fillargs.nsid = id;
+ if (net_cb->fillargs.add_ref)
+ net_cb->fillargs.ref_nsid = __peernet2id(net_cb->ref_net, peer);
+ ret = rtnl_net_fill(net_cb->skb, &net_cb->fillargs);
if (ret < 0)
return ret;
@@ -842,33 +886,96 @@ cont:
return 0;
}
+static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk,
+ struct rtnl_net_dump_cb *net_cb,
+ struct netlink_callback *cb)
+{
+ struct netlink_ext_ack *extack = cb->extack;
+ struct nlattr *tb[NETNSA_MAX + 1];
+ int err, i;
+
+ err = nlmsg_parse_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+ rtnl_net_policy, extack);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i <= NETNSA_MAX; i++) {
+ if (!tb[i])
+ continue;
+
+ if (i == NETNSA_TARGET_NSID) {
+ struct net *net;
+
+ net = rtnl_get_net_ns_capable(sk, nla_get_s32(tb[i]));
+ if (IS_ERR(net)) {
+ NL_SET_BAD_ATTR(extack, tb[i]);
+ NL_SET_ERR_MSG(extack,
+ "Invalid target network namespace id");
+ return PTR_ERR(net);
+ }
+ net_cb->fillargs.add_ref = true;
+ net_cb->ref_net = net_cb->tgt_net;
+ net_cb->tgt_net = net;
+ } else {
+ NL_SET_BAD_ATTR(extack, tb[i]);
+ NL_SET_ERR_MSG(extack,
+ "Unsupported attribute in dump request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
struct rtnl_net_dump_cb net_cb = {
- .net = net,
+ .tgt_net = sock_net(skb->sk),
.skb = skb,
- .cb = cb,
+ .fillargs = {
+ .portid = NETLINK_CB(cb->skb).portid,
+ .seq = cb->nlh->nlmsg_seq,
+ .flags = NLM_F_MULTI,
+ .cmd = RTM_NEWNSID,
+ },
.idx = 0,
.s_idx = cb->args[0],
};
+ int err = 0;
- if (cb->strict_check &&
- nlmsg_attrlen(cb->nlh, sizeof(struct rtgenmsg))) {
- NL_SET_ERR_MSG(cb->extack, "Unknown data in network namespace id dump request");
- return -EINVAL;
+ if (cb->strict_check) {
+ err = rtnl_valid_dump_net_req(cb->nlh, skb->sk, &net_cb, cb);
+ if (err < 0)
+ goto end;
}
- spin_lock_bh(&net->nsid_lock);
- idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
- spin_unlock_bh(&net->nsid_lock);
+ spin_lock_bh(&net_cb.tgt_net->nsid_lock);
+ if (net_cb.fillargs.add_ref &&
+ !net_eq(net_cb.ref_net, net_cb.tgt_net) &&
+ !spin_trylock_bh(&net_cb.ref_net->nsid_lock)) {
+ spin_unlock_bh(&net_cb.tgt_net->nsid_lock);
+ err = -EAGAIN;
+ goto end;
+ }
+ idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb);
+ if (net_cb.fillargs.add_ref &&
+ !net_eq(net_cb.ref_net, net_cb.tgt_net))
+ spin_unlock_bh(&net_cb.ref_net->nsid_lock);
+ spin_unlock_bh(&net_cb.tgt_net->nsid_lock);
cb->args[0] = net_cb.idx;
- return skb->len;
+end:
+ if (net_cb.fillargs.add_ref)
+ put_net(net_cb.tgt_net);
+ return err < 0 ? err : skb->len;
}
static void rtnl_net_notifyid(struct net *net, int cmd, int id)
{
+ struct net_fill_args fillargs = {
+ .cmd = cmd,
+ .nsid = id,
+ };
struct sk_buff *msg;
int err = -ENOMEM;
@@ -876,7 +983,7 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id)
if (!msg)
goto out;
- err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id);
+ err = rtnl_net_fill(msg, &fillargs);
if (err < 0)
goto err_out;
@@ -917,7 +1024,8 @@ static int __init net_ns_init(void)
init_net_initialized = true;
up_write(&pernet_ops_rwsem);
- register_pernet_subsys(&net_ns_ops);
+ if (register_pernet_subsys(&net_ns_ops))
+ panic("Could not register network namespace subsystems");
rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
RTNL_FLAG_DOIT_UNLOCKED);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 464f0ff46c22..361aabffb8c0 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -663,7 +663,7 @@ int netpoll_setup(struct netpoll *np)
np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
- err = dev_open(ndev);
+ err = dev_open(ndev, NULL);
if (err) {
np_err(np, "failed to open %s\n", ndev->name);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7819f7804eeb..48f61885fd6f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -59,7 +59,7 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
-#define RTNL_MAX_TYPE 49
+#define RTNL_MAX_TYPE 50
#define RTNL_SLAVE_MAX_TYPE 36
struct rtnl_link {
@@ -2444,7 +2444,7 @@ static int do_setlink(const struct sk_buff *skb,
sa->sa_family = dev->type;
memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
dev->addr_len);
- err = dev_set_mac_address(dev, sa);
+ err = dev_set_mac_address(dev, sa, extack);
kfree(sa);
if (err)
goto errout;
@@ -2489,7 +2489,8 @@ static int do_setlink(const struct sk_buff *skb,
}
if (ifm->ifi_flags || ifm->ifi_change) {
- err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
+ err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm),
+ extack);
if (err < 0)
goto errout;
}
@@ -2870,7 +2871,8 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
old_flags = dev->flags;
if (ifm && (ifm->ifi_flags || ifm->ifi_change)) {
- err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm));
+ err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm),
+ NULL);
if (err < 0)
return err;
}
@@ -2885,9 +2887,11 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
}
EXPORT_SYMBOL(rtnl_configure_link);
-struct net_device *rtnl_create_link(struct net *net,
- const char *ifname, unsigned char name_assign_type,
- const struct rtnl_link_ops *ops, struct nlattr *tb[])
+struct net_device *rtnl_create_link(struct net *net, const char *ifname,
+ unsigned char name_assign_type,
+ const struct rtnl_link_ops *ops,
+ struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
{
struct net_device *dev;
unsigned int num_tx_queues = 1;
@@ -2903,11 +2907,15 @@ struct net_device *rtnl_create_link(struct net *net,
else if (ops->get_num_rx_queues)
num_rx_queues = ops->get_num_rx_queues();
- if (num_tx_queues < 1 || num_tx_queues > 4096)
+ if (num_tx_queues < 1 || num_tx_queues > 4096) {
+ NL_SET_ERR_MSG(extack, "Invalid number of transmit queues");
return ERR_PTR(-EINVAL);
+ }
- if (num_rx_queues < 1 || num_rx_queues > 4096)
+ if (num_rx_queues < 1 || num_rx_queues > 4096) {
+ NL_SET_ERR_MSG(extack, "Invalid number of receive queues");
return ERR_PTR(-EINVAL);
+ }
dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
ops->setup, num_tx_queues, num_rx_queues);
@@ -2965,20 +2973,24 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
return 0;
}
-static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
+static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct nlattr **attr, struct netlink_ext_ack *extack)
{
+ struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
+ unsigned char name_assign_type = NET_NAME_USER;
+ struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
+ const struct rtnl_link_ops *m_ops = NULL;
+ struct net_device *master_dev = NULL;
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
- const struct rtnl_link_ops *m_ops = NULL;
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct net *dest_net, *link_net;
+ struct nlattr **slave_data;
+ char kind[MODULE_NAME_LEN];
struct net_device *dev;
- struct net_device *master_dev = NULL;
struct ifinfomsg *ifm;
- char kind[MODULE_NAME_LEN];
char ifname[IFNAMSIZ];
- struct nlattr *tb[IFLA_MAX+1];
- struct nlattr *linkinfo[IFLA_INFO_MAX+1];
- unsigned char name_assign_type = NET_NAME_USER;
+ struct nlattr **data;
int err;
#ifdef CONFIG_MODULES
@@ -3034,193 +3046,200 @@ replay:
ops = NULL;
}
- if (1) {
- struct nlattr *attr[RTNL_MAX_TYPE + 1];
- struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
- struct nlattr **data = NULL;
- struct nlattr **slave_data = NULL;
- struct net *dest_net, *link_net = NULL;
-
- if (ops) {
- if (ops->maxtype > RTNL_MAX_TYPE)
- return -EINVAL;
+ data = NULL;
+ if (ops) {
+ if (ops->maxtype > RTNL_MAX_TYPE)
+ return -EINVAL;
- if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
- err = nla_parse_nested(attr, ops->maxtype,
- linkinfo[IFLA_INFO_DATA],
- ops->policy, NULL);
- if (err < 0)
- return err;
- data = attr;
- }
- if (ops->validate) {
- err = ops->validate(tb, data, extack);
- if (err < 0)
- return err;
- }
+ if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
+ err = nla_parse_nested(attr, ops->maxtype,
+ linkinfo[IFLA_INFO_DATA],
+ ops->policy, extack);
+ if (err < 0)
+ return err;
+ data = attr;
+ }
+ if (ops->validate) {
+ err = ops->validate(tb, data, extack);
+ if (err < 0)
+ return err;
}
+ }
- if (m_ops) {
- if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
- return -EINVAL;
+ slave_data = NULL;
+ if (m_ops) {
+ if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
+ return -EINVAL;
- if (m_ops->slave_maxtype &&
- linkinfo[IFLA_INFO_SLAVE_DATA]) {
- err = nla_parse_nested(slave_attr,
- m_ops->slave_maxtype,
- linkinfo[IFLA_INFO_SLAVE_DATA],
- m_ops->slave_policy,
- NULL);
- if (err < 0)
- return err;
- slave_data = slave_attr;
- }
+ if (m_ops->slave_maxtype &&
+ linkinfo[IFLA_INFO_SLAVE_DATA]) {
+ err = nla_parse_nested(slave_attr, m_ops->slave_maxtype,
+ linkinfo[IFLA_INFO_SLAVE_DATA],
+ m_ops->slave_policy, extack);
+ if (err < 0)
+ return err;
+ slave_data = slave_attr;
}
+ }
- if (dev) {
- int status = 0;
-
- if (nlh->nlmsg_flags & NLM_F_EXCL)
- return -EEXIST;
- if (nlh->nlmsg_flags & NLM_F_REPLACE)
- return -EOPNOTSUPP;
+ if (dev) {
+ int status = 0;
- if (linkinfo[IFLA_INFO_DATA]) {
- if (!ops || ops != dev->rtnl_link_ops ||
- !ops->changelink)
- return -EOPNOTSUPP;
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ return -EEXIST;
+ if (nlh->nlmsg_flags & NLM_F_REPLACE)
+ return -EOPNOTSUPP;
- err = ops->changelink(dev, tb, data, extack);
- if (err < 0)
- return err;
- status |= DO_SETLINK_NOTIFY;
- }
+ if (linkinfo[IFLA_INFO_DATA]) {
+ if (!ops || ops != dev->rtnl_link_ops ||
+ !ops->changelink)
+ return -EOPNOTSUPP;
- if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
- if (!m_ops || !m_ops->slave_changelink)
- return -EOPNOTSUPP;
+ err = ops->changelink(dev, tb, data, extack);
+ if (err < 0)
+ return err;
+ status |= DO_SETLINK_NOTIFY;
+ }
- err = m_ops->slave_changelink(master_dev, dev,
- tb, slave_data,
- extack);
- if (err < 0)
- return err;
- status |= DO_SETLINK_NOTIFY;
- }
+ if (linkinfo[IFLA_INFO_SLAVE_DATA]) {
+ if (!m_ops || !m_ops->slave_changelink)
+ return -EOPNOTSUPP;
- return do_setlink(skb, dev, ifm, extack, tb, ifname,
- status);
+ err = m_ops->slave_changelink(master_dev, dev, tb,
+ slave_data, extack);
+ if (err < 0)
+ return err;
+ status |= DO_SETLINK_NOTIFY;
}
- if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
- if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
- return rtnl_group_changelink(skb, net,
+ return do_setlink(skb, dev, ifm, extack, tb, ifname, status);
+ }
+
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
+ if (ifm->ifi_index == 0 && tb[IFLA_GROUP])
+ return rtnl_group_changelink(skb, net,
nla_get_u32(tb[IFLA_GROUP]),
ifm, extack, tb);
- return -ENODEV;
- }
+ return -ENODEV;
+ }
- if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
- return -EOPNOTSUPP;
+ if (tb[IFLA_MAP] || tb[IFLA_PROTINFO])
+ return -EOPNOTSUPP;
- if (!ops) {
+ if (!ops) {
#ifdef CONFIG_MODULES
- if (kind[0]) {
- __rtnl_unlock();
- request_module("rtnl-link-%s", kind);
- rtnl_lock();
- ops = rtnl_link_ops_get(kind);
- if (ops)
- goto replay;
- }
-#endif
- return -EOPNOTSUPP;
+ if (kind[0]) {
+ __rtnl_unlock();
+ request_module("rtnl-link-%s", kind);
+ rtnl_lock();
+ ops = rtnl_link_ops_get(kind);
+ if (ops)
+ goto replay;
}
+#endif
+ NL_SET_ERR_MSG(extack, "Unknown device type");
+ return -EOPNOTSUPP;
+ }
- if (!ops->setup)
- return -EOPNOTSUPP;
-
- if (!ifname[0]) {
- snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
- name_assign_type = NET_NAME_ENUM;
- }
+ if (!ops->setup)
+ return -EOPNOTSUPP;
- dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
- if (IS_ERR(dest_net))
- return PTR_ERR(dest_net);
+ if (!ifname[0]) {
+ snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
+ name_assign_type = NET_NAME_ENUM;
+ }
- if (tb[IFLA_LINK_NETNSID]) {
- int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
+ dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
+ if (IS_ERR(dest_net))
+ return PTR_ERR(dest_net);
- link_net = get_net_ns_by_id(dest_net, id);
- if (!link_net) {
- err = -EINVAL;
- goto out;
- }
- err = -EPERM;
- if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
- goto out;
- }
+ if (tb[IFLA_LINK_NETNSID]) {
+ int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
- dev = rtnl_create_link(link_net ? : dest_net, ifname,
- name_assign_type, ops, tb);
- if (IS_ERR(dev)) {
- err = PTR_ERR(dev);
+ link_net = get_net_ns_by_id(dest_net, id);
+ if (!link_net) {
+ NL_SET_ERR_MSG(extack, "Unknown network namespace id");
+ err = -EINVAL;
goto out;
}
+ err = -EPERM;
+ if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
+ goto out;
+ } else {
+ link_net = NULL;
+ }
- dev->ifindex = ifm->ifi_index;
+ dev = rtnl_create_link(link_net ? : dest_net, ifname,
+ name_assign_type, ops, tb, extack);
+ if (IS_ERR(dev)) {
+ err = PTR_ERR(dev);
+ goto out;
+ }
- if (ops->newlink) {
- err = ops->newlink(link_net ? : net, dev, tb, data,
- extack);
- /* Drivers should call free_netdev() in ->destructor
- * and unregister it on failure after registration
- * so that device could be finally freed in rtnl_unlock.
- */
- if (err < 0) {
- /* If device is not registered at all, free it now */
- if (dev->reg_state == NETREG_UNINITIALIZED)
- free_netdev(dev);
- goto out;
- }
- } else {
- err = register_netdevice(dev);
- if (err < 0) {
+ dev->ifindex = ifm->ifi_index;
+
+ if (ops->newlink) {
+ err = ops->newlink(link_net ? : net, dev, tb, data, extack);
+ /* Drivers should call free_netdev() in ->destructor
+ * and unregister it on failure after registration
+ * so that device could be finally freed in rtnl_unlock.
+ */
+ if (err < 0) {
+ /* If device is not registered at all, free it now */
+ if (dev->reg_state == NETREG_UNINITIALIZED)
free_netdev(dev);
- goto out;
- }
+ goto out;
}
- err = rtnl_configure_link(dev, ifm);
+ } else {
+ err = register_netdevice(dev);
+ if (err < 0) {
+ free_netdev(dev);
+ goto out;
+ }
+ }
+ err = rtnl_configure_link(dev, ifm);
+ if (err < 0)
+ goto out_unregister;
+ if (link_net) {
+ err = dev_change_net_namespace(dev, dest_net, ifname);
if (err < 0)
goto out_unregister;
- if (link_net) {
- err = dev_change_net_namespace(dev, dest_net, ifname);
- if (err < 0)
- goto out_unregister;
- }
- if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]),
- extack);
- if (err)
- goto out_unregister;
- }
+ }
+ if (tb[IFLA_MASTER]) {
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
+ if (err)
+ goto out_unregister;
+ }
out:
- if (link_net)
- put_net(link_net);
- put_net(dest_net);
- return err;
+ if (link_net)
+ put_net(link_net);
+ put_net(dest_net);
+ return err;
out_unregister:
- if (ops->newlink) {
- LIST_HEAD(list_kill);
+ if (ops->newlink) {
+ LIST_HEAD(list_kill);
- ops->dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
- } else {
- unregister_netdevice(dev);
- }
- goto out;
+ ops->dellink(dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
+ } else {
+ unregister_netdevice(dev);
}
+ goto out;
+}
+
+static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr **attr;
+ int ret;
+
+ attr = kmalloc_array(RTNL_MAX_TYPE + 1, sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ ret = __rtnl_newlink(skb, nlh, attr, extack);
+ kfree(attr);
+ return ret;
}
static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -4002,6 +4021,160 @@ out:
return skb->len;
}
+static int valid_fdb_get_strict(const struct nlmsghdr *nlh,
+ struct nlattr **tb, u8 *ndm_flags,
+ int *br_idx, int *brport_idx, u8 **addr,
+ u16 *vid, struct netlink_ext_ack *extack)
+{
+ struct ndmsg *ndm;
+ int err, i;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+ NL_SET_ERR_MSG(extack, "Invalid header for fdb get request");
+ return -EINVAL;
+ }
+
+ ndm = nlmsg_data(nlh);
+ if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
+ ndm->ndm_type) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for fdb get request");
+ return -EINVAL;
+ }
+
+ if (ndm->ndm_flags & ~(NTF_MASTER | NTF_SELF)) {
+ NL_SET_ERR_MSG(extack, "Invalid flags in header for fdb get request");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX,
+ nda_policy, extack);
+ if (err < 0)
+ return err;
+
+ *ndm_flags = ndm->ndm_flags;
+ *brport_idx = ndm->ndm_ifindex;
+ for (i = 0; i <= NDA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ switch (i) {
+ case NDA_MASTER:
+ *br_idx = nla_get_u32(tb[i]);
+ break;
+ case NDA_LLADDR:
+ if (nla_len(tb[i]) != ETH_ALEN) {
+ NL_SET_ERR_MSG(extack, "Invalid address in fdb get request");
+ return -EINVAL;
+ }
+ *addr = nla_data(tb[i]);
+ break;
+ case NDA_VLAN:
+ err = fdb_vid_parse(tb[i], vid, extack);
+ if (err)
+ return err;
+ break;
+ case NDA_VNI:
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Unsupported attribute in fdb get request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int rtnl_fdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = NULL, *br_dev = NULL;
+ const struct net_device_ops *ops = NULL;
+ struct net *net = sock_net(in_skb->sk);
+ struct nlattr *tb[NDA_MAX + 1];
+ struct sk_buff *skb;
+ int brport_idx = 0;
+ u8 ndm_flags = 0;
+ int br_idx = 0;
+ u8 *addr = NULL;
+ u16 vid = 0;
+ int err;
+
+ err = valid_fdb_get_strict(nlh, tb, &ndm_flags, &br_idx,
+ &brport_idx, &addr, &vid, extack);
+ if (err < 0)
+ return err;
+
+ if (brport_idx) {
+ dev = __dev_get_by_index(net, brport_idx);
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Unknown device ifindex");
+ return -ENODEV;
+ }
+ }
+
+ if (br_idx) {
+ if (dev) {
+ NL_SET_ERR_MSG(extack, "Master and device are mutually exclusive");
+ return -EINVAL;
+ }
+
+ br_dev = __dev_get_by_index(net, br_idx);
+ if (!br_dev) {
+ NL_SET_ERR_MSG(extack, "Invalid master ifindex");
+ return -EINVAL;
+ }
+ ops = br_dev->netdev_ops;
+ }
+
+ if (dev) {
+ if (!ndm_flags || (ndm_flags & NTF_MASTER)) {
+ if (!(dev->priv_flags & IFF_BRIDGE_PORT)) {
+ NL_SET_ERR_MSG(extack, "Device is not a bridge port");
+ return -EINVAL;
+ }
+ br_dev = netdev_master_upper_dev_get(dev);
+ if (!br_dev) {
+ NL_SET_ERR_MSG(extack, "Master of device not found");
+ return -EINVAL;
+ }
+ ops = br_dev->netdev_ops;
+ } else {
+ if (!(ndm_flags & NTF_SELF)) {
+ NL_SET_ERR_MSG(extack, "Missing NTF_SELF");
+ return -EINVAL;
+ }
+ ops = dev->netdev_ops;
+ }
+ }
+
+ if (!br_dev && !dev) {
+ NL_SET_ERR_MSG(extack, "No device specified");
+ return -ENODEV;
+ }
+
+ if (!ops || !ops->ndo_fdb_get) {
+ NL_SET_ERR_MSG(extack, "Fdb get operation not supported by device");
+ return -EOPNOTSUPP;
+ }
+
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (br_dev)
+ dev = br_dev;
+ err = ops->ndo_fdb_get(skb, tb, dev, addr, vid,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, extack);
+ if (err)
+ goto out;
+
+ return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+out:
+ kfree_skb(skb);
+ return err;
+}
+
static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask,
unsigned int attrnum, unsigned int flag)
{
@@ -4313,7 +4486,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
- err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags);
+ err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags,
+ extack);
if (err)
goto out;
@@ -4325,7 +4499,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
err = -EOPNOTSUPP;
else
err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh,
- flags);
+ flags,
+ extack);
if (!err) {
flags &= ~BRIDGE_FLAGS_SELF;
@@ -5060,7 +5235,7 @@ void __init rtnetlink_init(void)
rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0);
rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, 0);
+ rtnl_register(PF_BRIDGE, RTM_GETNEIGH, rtnl_fdb_get, rtnl_fdb_dump, 0);
rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0);
rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a8217e221e19..37317ffec146 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -79,6 +79,9 @@
struct kmem_cache *skbuff_head_cache __ro_after_init;
static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
+#ifdef CONFIG_SKB_EXTENSIONS
+static struct kmem_cache *skbuff_ext_cache __ro_after_init;
+#endif
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
@@ -606,7 +609,6 @@ fastpath:
void skb_release_head_state(struct sk_buff *skb)
{
skb_dst_drop(skb);
- secpath_reset(skb);
if (skb->destructor) {
WARN_ON(in_irq());
skb->destructor(skb);
@@ -614,9 +616,7 @@ void skb_release_head_state(struct sk_buff *skb)
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_conntrack_put(skb_nfct(skb));
#endif
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- nf_bridge_put(skb->nf_bridge);
-#endif
+ skb_ext_put(skb);
}
/* Free everything but the sk_buff shell. */
@@ -796,9 +796,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->dev = old->dev;
memcpy(new->cb, old->cb, sizeof(old->cb));
skb_dst_copy(new, old);
-#ifdef CONFIG_XFRM
- new->sp = secpath_get(old->sp);
-#endif
+ __skb_ext_copy(new, old);
__nf_copy(new, old, false);
/* Note : this field could be in headers_start/headers_end section
@@ -1089,7 +1087,7 @@ void sock_zerocopy_put(struct ubuf_info *uarg)
}
EXPORT_SYMBOL_GPL(sock_zerocopy_put);
-void sock_zerocopy_put_abort(struct ubuf_info *uarg)
+void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
{
if (uarg) {
struct sock *sk = skb_from_uarg(uarg)->sk;
@@ -1097,7 +1095,8 @@ void sock_zerocopy_put_abort(struct ubuf_info *uarg)
atomic_dec(&sk->sk_zckey);
uarg->len--;
- sock_zerocopy_put(uarg);
+ if (have_uref)
+ sock_zerocopy_put(uarg);
}
}
EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
@@ -1105,6 +1104,12 @@ EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort);
extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
struct iov_iter *from, size_t length);
+int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len)
+{
+ return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
+}
+EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram);
+
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct msghdr *msg, int len,
struct ubuf_info *uarg)
@@ -1131,7 +1136,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
return err;
}
- skb_zcopy_set(skb, uarg);
+ skb_zcopy_set(skb, uarg, NULL);
return skb->len - orig_len;
}
EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
@@ -1151,7 +1156,7 @@ static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
if (skb_copy_ubufs(nskb, GFP_ATOMIC))
return -EIO;
}
- skb_zcopy_set(nskb, skb_uarg(orig));
+ skb_zcopy_set(nskb, skb_uarg(orig), NULL);
}
return 0;
}
@@ -1925,8 +1930,6 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
struct sk_buff *insp = NULL;
do {
- BUG_ON(!list);
-
if (list->len <= eat) {
/* Eaten as whole. */
eat -= list->len;
@@ -2366,19 +2369,6 @@ error:
}
EXPORT_SYMBOL_GPL(skb_send_sock_locked);
-/* Send skb data on a socket. */
-int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
-{
- int ret = 0;
-
- lock_sock(sk);
- ret = skb_send_sock_locked(sk, skb, offset, len);
- release_sock(sk);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(skb_send_sock);
-
/**
* skb_store_bits - store bits from kernel buffer to skb
* @skb: destination buffer
@@ -2645,6 +2635,65 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
}
EXPORT_SYMBOL(skb_copy_and_csum_bits);
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
+{
+ __sum16 sum;
+
+ sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
+ /* See comments in __skb_checksum_complete(). */
+ if (likely(!sum)) {
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
+ netdev_rx_csum_fault(skb->dev, skb);
+ }
+ if (!skb_shared(skb))
+ skb->csum_valid = !sum;
+ return sum;
+}
+EXPORT_SYMBOL(__skb_checksum_complete_head);
+
+/* This function assumes skb->csum already holds pseudo header's checksum,
+ * which has been changed from the hardware checksum, for example, by
+ * __skb_checksum_validate_complete(). And, the original skb->csum must
+ * have been validated unsuccessfully for CHECKSUM_COMPLETE case.
+ *
+ * It returns non-zero if the recomputed checksum is still invalid, otherwise
+ * zero. The new checksum is stored back into skb->csum unless the skb is
+ * shared.
+ */
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
+{
+ __wsum csum;
+ __sum16 sum;
+
+ csum = skb_checksum(skb, 0, skb->len, 0);
+
+ sum = csum_fold(csum_add(skb->csum, csum));
+ /* This check is inverted, because we already knew the hardware
+ * checksum is invalid before calling this function. So, if the
+ * re-computed checksum is valid instead, then we have a mismatch
+ * between the original skb->csum and skb_checksum(). This means either
+ * the original hardware checksum is incorrect or we screw up skb->csum
+ * when moving skb->data around.
+ */
+ if (likely(!sum)) {
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
+ netdev_rx_csum_fault(skb->dev, skb);
+ }
+
+ if (!skb_shared(skb)) {
+ /* Save full packet checksum */
+ skb->csum = csum;
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum_complete_sw = 1;
+ skb->csum_valid = !sum;
+ }
+
+ return sum;
+}
+EXPORT_SYMBOL(__skb_checksum_complete);
+
static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
{
net_warn_ratelimited(
@@ -2962,28 +3011,6 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
}
EXPORT_SYMBOL(skb_append);
-/**
- * skb_insert - insert a buffer
- * @old: buffer to insert before
- * @newsk: buffer to insert
- * @list: list to use
- *
- * Place a packet before a given packet in a list. The list locks are
- * taken and this function is atomic with respect to other list locked
- * calls.
- *
- * A buffer cannot be placed on two lists at the same time.
- */
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&list->lock, flags);
- __skb_insert(newsk, old->prev, old, list);
- spin_unlock_irqrestore(&list->lock, flags);
-}
-EXPORT_SYMBOL(skb_insert);
-
static inline void skb_split_inside_header(struct sk_buff *skb,
struct sk_buff* skb1,
const u32 len, const int pos)
@@ -3873,6 +3900,46 @@ done:
}
EXPORT_SYMBOL_GPL(skb_gro_receive);
+#ifdef CONFIG_SKB_EXTENSIONS
+#define SKB_EXT_ALIGN_VALUE 8
+#define SKB_EXT_CHUNKSIZEOF(x) (ALIGN((sizeof(x)), SKB_EXT_ALIGN_VALUE) / SKB_EXT_ALIGN_VALUE)
+
+static const u8 skb_ext_type_len[] = {
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ [SKB_EXT_BRIDGE_NF] = SKB_EXT_CHUNKSIZEOF(struct nf_bridge_info),
+#endif
+#ifdef CONFIG_XFRM
+ [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
+#endif
+};
+
+static __always_inline unsigned int skb_ext_total_length(void)
+{
+ return SKB_EXT_CHUNKSIZEOF(struct skb_ext) +
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ skb_ext_type_len[SKB_EXT_BRIDGE_NF] +
+#endif
+#ifdef CONFIG_XFRM
+ skb_ext_type_len[SKB_EXT_SEC_PATH] +
+#endif
+ 0;
+}
+
+static void skb_extensions_init(void)
+{
+ BUILD_BUG_ON(SKB_EXT_NUM >= 8);
+ BUILD_BUG_ON(skb_ext_total_length() > 255);
+
+ skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache",
+ SKB_EXT_ALIGN_VALUE * skb_ext_total_length(),
+ 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+ NULL);
+}
+#else
+static void skb_extensions_init(void) {}
+#endif
+
void __init skb_init(void)
{
skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache",
@@ -3887,6 +3954,7 @@ void __init skb_init(void)
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
+ skb_extensions_init();
}
static int
@@ -4856,7 +4924,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
#ifdef CONFIG_NET_SWITCHDEV
skb->offload_fwd_mark = 0;
- skb->offload_mr_fwd_mark = 0;
+ skb->offload_l3_fwd_mark = 0;
#endif
if (!xnet)
@@ -5128,7 +5196,7 @@ int skb_vlan_pop(struct sk_buff *skb)
int err;
if (likely(skb_vlan_tag_present(skb))) {
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
} else {
if (unlikely(!eth_type_vlan(skb->protocol)))
return 0;
@@ -5525,3 +5593,148 @@ void skb_condense(struct sk_buff *skb)
*/
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
}
+
+#ifdef CONFIG_SKB_EXTENSIONS
+static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
+{
+ return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
+}
+
+static struct skb_ext *skb_ext_alloc(void)
+{
+ struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
+
+ if (new) {
+ memset(new->offset, 0, sizeof(new->offset));
+ refcount_set(&new->refcnt, 1);
+ }
+
+ return new;
+}
+
+static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
+ unsigned int old_active)
+{
+ struct skb_ext *new;
+
+ if (refcount_read(&old->refcnt) == 1)
+ return old;
+
+ new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
+ if (!new)
+ return NULL;
+
+ memcpy(new, old, old->chunks * SKB_EXT_ALIGN_VALUE);
+ refcount_set(&new->refcnt, 1);
+
+#ifdef CONFIG_XFRM
+ if (old_active & (1 << SKB_EXT_SEC_PATH)) {
+ struct sec_path *sp = skb_ext_get_ptr(old, SKB_EXT_SEC_PATH);
+ unsigned int i;
+
+ for (i = 0; i < sp->len; i++)
+ xfrm_state_hold(sp->xvec[i]);
+ }
+#endif
+ __skb_ext_put(old);
+ return new;
+}
+
+/**
+ * skb_ext_add - allocate space for given extension, COW if needed
+ * @skb: buffer
+ * @id: extension to allocate space for
+ *
+ * Allocates enough space for the given extension.
+ * If the extension is already present, a pointer to that extension
+ * is returned.
+ *
+ * If the skb was cloned, COW applies and the returned memory can be
+ * modified without changing the extension space of clones buffers.
+ *
+ * Returns pointer to the extension or NULL on allocation failure.
+ */
+void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
+{
+ struct skb_ext *new, *old = NULL;
+ unsigned int newlen, newoff;
+
+ if (skb->active_extensions) {
+ old = skb->extensions;
+
+ new = skb_ext_maybe_cow(old, skb->active_extensions);
+ if (!new)
+ return NULL;
+
+ if (__skb_ext_exist(new, id))
+ goto set_active;
+
+ newoff = new->chunks;
+ } else {
+ newoff = SKB_EXT_CHUNKSIZEOF(*new);
+
+ new = skb_ext_alloc();
+ if (!new)
+ return NULL;
+ }
+
+ newlen = newoff + skb_ext_type_len[id];
+ new->chunks = newlen;
+ new->offset[id] = newoff;
+set_active:
+ skb->extensions = new;
+ skb->active_extensions |= 1 << id;
+ return skb_ext_get_ptr(new, id);
+}
+EXPORT_SYMBOL(skb_ext_add);
+
+#ifdef CONFIG_XFRM
+static void skb_ext_put_sp(struct sec_path *sp)
+{
+ unsigned int i;
+
+ for (i = 0; i < sp->len; i++)
+ xfrm_state_put(sp->xvec[i]);
+}
+#endif
+
+void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
+{
+ struct skb_ext *ext = skb->extensions;
+
+ skb->active_extensions &= ~(1 << id);
+ if (skb->active_extensions == 0) {
+ skb->extensions = NULL;
+ __skb_ext_put(ext);
+#ifdef CONFIG_XFRM
+ } else if (id == SKB_EXT_SEC_PATH &&
+ refcount_read(&ext->refcnt) == 1) {
+ struct sec_path *sp = skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH);
+
+ skb_ext_put_sp(sp);
+ sp->len = 0;
+#endif
+ }
+}
+EXPORT_SYMBOL(__skb_ext_del);
+
+void __skb_ext_put(struct skb_ext *ext)
+{
+ /* If this is last clone, nothing can increment
+ * it after check passes. Avoids one atomic op.
+ */
+ if (refcount_read(&ext->refcnt) == 1)
+ goto free_now;
+
+ if (!refcount_dec_and_test(&ext->refcnt))
+ return;
+free_now:
+#ifdef CONFIG_XFRM
+ if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
+ skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH));
+#endif
+
+ kmem_cache_free(skbuff_ext_cache, ext);
+}
+EXPORT_SYMBOL(__skb_ext_put);
+#endif /* CONFIG_SKB_EXTENSIONS */
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index abaa5f48d3ec..d6d5c20d7044 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -406,7 +406,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
msg->skb = skb;
sk_psock_queue_msg(psock, msg);
- sk->sk_data_ready(sk);
+ sk_psock_data_ready(sk, psock);
return copied;
}
@@ -575,6 +575,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
{
rcu_assign_sk_user_data(sk, NULL);
sk_psock_cork_free(psock);
+ sk_psock_zap_ingress(psock);
sk_psock_restore_proto(sk, psock);
write_lock_bh(&sk->sk_callback_lock);
@@ -672,6 +673,22 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
bool ingress;
switch (verdict) {
+ case __SK_PASS:
+ sk_other = psock->sk;
+ if (sock_flag(sk_other, SOCK_DEAD) ||
+ !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+ goto out_free;
+ }
+ if (atomic_read(&sk_other->sk_rmem_alloc) <=
+ sk_other->sk_rcvbuf) {
+ struct tcp_skb_cb *tcp = TCP_SKB_CB(skb);
+
+ tcp->bpf.flags |= BPF_F_INGRESS;
+ skb_queue_tail(&psock->ingress_skb, skb);
+ schedule_work(&psock->work);
+ break;
+ }
+ goto out_free;
case __SK_REDIRECT:
sk_other = tcp_skb_bpf_redirect_fetch(skb);
if (unlikely(!sk_other))
@@ -738,7 +755,7 @@ static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
}
/* Called with socket lock held. */
-static void sk_psock_data_ready(struct sock *sk)
+static void sk_psock_strp_data_ready(struct sock *sk)
{
struct sk_psock *psock;
@@ -786,7 +803,7 @@ void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
return;
parser->saved_data_ready = sk->sk_data_ready;
- sk->sk_data_ready = sk_psock_data_ready;
+ sk->sk_data_ready = sk_psock_strp_data_ready;
sk->sk_write_space = sk_psock_write_space;
parser->enabled = true;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 080a880a1761..f00902c532cc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -567,6 +567,8 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
lock_sock(sk);
sk->sk_bound_dev_if = index;
+ if (sk->sk_prot->rehash)
+ sk->sk_prot->rehash(sk);
sk_dst_reset(sk);
release_sock(sk);
@@ -698,6 +700,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
break;
case SO_DONTROUTE:
sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
+ sk_dst_reset(sk);
break;
case SO_BROADCAST:
sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
@@ -950,10 +953,12 @@ set_rcvbuf:
clear_bit(SOCK_PASSSEC, &sock->flags);
break;
case SO_MARK:
- if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
ret = -EPERM;
- else
+ } else if (val != sk->sk_mark) {
sk->sk_mark = val;
+ sk_dst_reset(sk);
+ }
break;
case SO_RXQ_OVFL:
@@ -1014,7 +1019,10 @@ set_rcvbuf:
case SO_ZEROCOPY:
if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
- if (sk->sk_protocol != IPPROTO_TCP)
+ if (!((sk->sk_type == SOCK_STREAM &&
+ sk->sk_protocol == IPPROTO_TCP) ||
+ (sk->sk_type == SOCK_DGRAM &&
+ sk->sk_protocol == IPPROTO_UDP)))
ret = -ENOTSUPP;
} else if (sk->sk_family != PF_RDS) {
ret = -ENOTSUPP;
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index ba5cba56f574..d8fe3e549373 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -187,6 +187,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
call_rcu(&old_reuse->rcu, reuseport_free_rcu);
return 0;
}
+EXPORT_SYMBOL(reuseport_add_sock);
void reuseport_detach_sock(struct sock *sk)
{
diff --git a/net/core/stream.c b/net/core/stream.c
index 7d329fb1f553..e94bb02a5629 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -32,7 +32,7 @@ void sk_stream_write_space(struct sock *sk)
struct socket *sock = sk->sk_socket;
struct socket_wq *wq;
- if (sk_stream_is_writeable(sk) && sock) {
+ if (__sk_stream_is_writeable(sk, 1) && sock) {
clear_bit(SOCK_NOSPACE, &sock->flags);
rcu_read_lock();
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 8e08cea6f178..26a21d97b6b0 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -231,7 +231,7 @@ EXPORT_SYMBOL(dccp_req_err);
* check at all. A more general error queue to queue errors for later handling
* is probably better.
*/
-static void dccp_v4_err(struct sk_buff *skb, u32 info)
+static int dccp_v4_err(struct sk_buff *skb, u32 info)
{
const struct iphdr *iph = (struct iphdr *)skb->data;
const u8 offset = iph->ihl << 2;
@@ -259,16 +259,18 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
inet_iif(skb), 0);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
- return;
+ return -ENOENT;
}
if (sk->sk_state == DCCP_TIME_WAIT) {
inet_twsk_put(inet_twsk(sk));
- return;
+ return 0;
}
seq = dccp_hdr_seq(dh);
- if (sk->sk_state == DCCP_NEW_SYN_RECV)
- return dccp_req_err(sk, seq);
+ if (sk->sk_state == DCCP_NEW_SYN_RECV) {
+ dccp_req_err(sk, seq);
+ return 0;
+ }
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
@@ -357,6 +359,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
out:
bh_unlock_sock(sk);
sock_put(sk);
+ return 0;
}
static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6344f1b18a6a..d5740bad5b18 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -68,7 +68,7 @@ static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
}
-static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
@@ -96,16 +96,18 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
- return;
+ return -ENOENT;
}
if (sk->sk_state == DCCP_TIME_WAIT) {
inet_twsk_put(inet_twsk(sk));
- return;
+ return 0;
}
seq = dccp_hdr_seq(dh);
- if (sk->sk_state == DCCP_NEW_SYN_RECV)
- return dccp_req_err(sk, seq);
+ if (sk->sk_state == DCCP_NEW_SYN_RECV) {
+ dccp_req_err(sk, seq);
+ return 0;
+ }
bh_lock_sock(sk);
if (sock_owned_by_user(sk))
@@ -183,6 +185,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
out:
bh_unlock_sock(sk);
sock_put(sk);
+ return 0;
}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 43733accf58e..2cc5fbb1b29e 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -948,6 +948,7 @@ int inet_dccp_listen(struct socket *sock, int backlog)
if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
goto out;
+ sk->sk_max_ack_backlog = backlog;
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
@@ -960,7 +961,6 @@ int inet_dccp_listen(struct socket *sock, int backlog)
if (err)
goto out;
}
- sk->sk_max_ack_backlog = backlog;
err = 0;
out:
@@ -1139,8 +1139,11 @@ static int __init dccp_init(void)
rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
if (rc)
goto out_fail;
- rc = -ENOBUFS;
inet_hashinfo_init(&dccp_hashinfo);
+ rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
+ if (rc)
+ goto out_fail;
+ rc = -ENOBUFS;
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index dbd0f7bae00a..bdccc46a2921 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -192,7 +192,7 @@ static int check_port(__le16 port)
static unsigned short port_alloc(struct sock *sk)
{
struct dn_scp *scp = DN_SK(sk);
-static unsigned short port = 0x2000;
+ static unsigned short port = 0x2000;
unsigned short i_port = port;
while(check_port(cpu_to_le16(++port)) != 0) {
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 48c41918fb35..91e52973ee13 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -44,6 +44,10 @@ config NET_DSA_TAG_GSWIP
config NET_DSA_TAG_KSZ
bool
+config NET_DSA_TAG_KSZ9477
+ bool
+ select NET_DSA_TAG_KSZ
+
config NET_DSA_TAG_LAN9303
bool
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index a69c1790bbfc..aee909bcddc4 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -55,8 +55,8 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
#ifdef CONFIG_NET_DSA_TAG_GSWIP
[DSA_TAG_PROTO_GSWIP] = &gswip_netdev_ops,
#endif
-#ifdef CONFIG_NET_DSA_TAG_KSZ
- [DSA_TAG_PROTO_KSZ] = &ksz_netdev_ops,
+#ifdef CONFIG_NET_DSA_TAG_KSZ9477
+ [DSA_TAG_PROTO_KSZ9477] = &ksz9477_netdev_ops,
#endif
#ifdef CONFIG_NET_DSA_TAG_LAN9303
[DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
@@ -91,8 +91,8 @@ const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops)
#ifdef CONFIG_NET_DSA_TAG_GSWIP
[DSA_TAG_PROTO_GSWIP] = "gswip",
#endif
-#ifdef CONFIG_NET_DSA_TAG_KSZ
- [DSA_TAG_PROTO_KSZ] = "ksz",
+#ifdef CONFIG_NET_DSA_TAG_KSZ9477
+ [DSA_TAG_PROTO_KSZ9477] = "ksz9477",
#endif
#ifdef CONFIG_NET_DSA_TAG_LAN9303
[DSA_TAG_PROTO_LAN9303] = "lan9303",
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 9e4fd04ab53c..026a05774bf7 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -210,7 +210,7 @@ extern const struct dsa_device_ops edsa_netdev_ops;
extern const struct dsa_device_ops gswip_netdev_ops;
/* tag_ksz.c */
-extern const struct dsa_device_ops ksz_netdev_ops;
+extern const struct dsa_device_ops ksz9477_netdev_ops;
/* tag_lan9303.c */
extern const struct dsa_device_ops lan9303_netdev_ops;
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 5e8c9bef78bd..71bb15f491c8 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -179,10 +179,38 @@ static const struct attribute_group dsa_group = {
.attrs = dsa_slave_attrs,
};
+static void dsa_master_set_mtu(struct net_device *dev, struct dsa_port *cpu_dp)
+{
+ unsigned int mtu = ETH_DATA_LEN + cpu_dp->tag_ops->overhead;
+ int err;
+
+ rtnl_lock();
+ if (mtu <= dev->max_mtu) {
+ err = dev_set_mtu(dev, mtu);
+ if (err)
+ netdev_dbg(dev, "Unable to set MTU to include for DSA overheads\n");
+ }
+ rtnl_unlock();
+}
+
+static void dsa_master_reset_mtu(struct net_device *dev)
+{
+ int err;
+
+ rtnl_lock();
+ err = dev_set_mtu(dev, ETH_DATA_LEN);
+ if (err)
+ netdev_dbg(dev,
+ "Unable to reset MTU to exclude DSA overheads\n");
+ rtnl_unlock();
+}
+
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
{
int ret;
+ dsa_master_set_mtu(dev, cpu_dp);
+
/* If we use a tagging format that doesn't have an ethertype
* field, make sure that all packets from this point on get
* sent to the tag format's receive function.
@@ -206,6 +234,7 @@ void dsa_master_teardown(struct net_device *dev)
{
sysfs_remove_group(&dev->dev.kobj, &dsa_group);
dsa_master_ethtool_teardown(dev);
+ dsa_master_reset_mtu(dev);
dev->dsa_ptr = NULL;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index ed0595459df1..2d7e01b23572 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -252,9 +252,6 @@ int dsa_port_vlan_add(struct dsa_port *dp,
.vlan = vlan,
};
- if (netif_is_bridge_master(vlan->obj.orig_dev))
- return -EOPNOTSUPP;
-
if (br_vlan_enabled(dp->bridge_dev))
return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index aec78f5aca72..a3fcc1d01615 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1050,8 +1050,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
static const struct switchdev_ops dsa_slave_switchdev_ops = {
.switchdev_port_attr_get = dsa_slave_port_attr_get,
.switchdev_port_attr_set = dsa_slave_port_attr_set,
- .switchdev_port_obj_add = dsa_slave_port_obj_add,
- .switchdev_port_obj_del = dsa_slave_port_obj_del,
};
static struct device_type dsa_type = {
@@ -1529,6 +1527,44 @@ err_fdb_work_init:
return NOTIFY_BAD;
}
+static int
+dsa_slave_switchdev_port_obj_event(unsigned long event,
+ struct net_device *netdev,
+ struct switchdev_notifier_port_obj_info *port_obj_info)
+{
+ int err = -EOPNOTSUPP;
+
+ switch (event) {
+ case SWITCHDEV_PORT_OBJ_ADD:
+ err = dsa_slave_port_obj_add(netdev, port_obj_info->obj,
+ port_obj_info->trans);
+ break;
+ case SWITCHDEV_PORT_OBJ_DEL:
+ err = dsa_slave_port_obj_del(netdev, port_obj_info->obj);
+ break;
+ }
+
+ port_obj_info->handled = true;
+ return notifier_from_errno(err);
+}
+
+static int dsa_slave_switchdev_blocking_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+
+ if (!dsa_slave_dev_check(dev))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case SWITCHDEV_PORT_OBJ_ADD: /* fall through */
+ case SWITCHDEV_PORT_OBJ_DEL:
+ return dsa_slave_switchdev_port_obj_event(event, dev, ptr);
+ }
+
+ return NOTIFY_DONE;
+}
+
static struct notifier_block dsa_slave_nb __read_mostly = {
.notifier_call = dsa_slave_netdevice_event,
};
@@ -1537,8 +1573,13 @@ static struct notifier_block dsa_slave_switchdev_notifier = {
.notifier_call = dsa_slave_switchdev_event,
};
+static struct notifier_block dsa_slave_switchdev_blocking_notifier = {
+ .notifier_call = dsa_slave_switchdev_blocking_event,
+};
+
int dsa_slave_register_notifier(void)
{
+ struct notifier_block *nb;
int err;
err = register_netdevice_notifier(&dsa_slave_nb);
@@ -1549,8 +1590,15 @@ int dsa_slave_register_notifier(void)
if (err)
goto err_switchdev_nb;
+ nb = &dsa_slave_switchdev_blocking_notifier;
+ err = register_switchdev_blocking_notifier(nb);
+ if (err)
+ goto err_switchdev_blocking_nb;
+
return 0;
+err_switchdev_blocking_nb:
+ unregister_switchdev_notifier(&dsa_slave_switchdev_notifier);
err_switchdev_nb:
unregister_netdevice_notifier(&dsa_slave_nb);
return err;
@@ -1558,8 +1606,14 @@ err_switchdev_nb:
void dsa_slave_unregister_notifier(void)
{
+ struct notifier_block *nb;
int err;
+ nb = &dsa_slave_switchdev_blocking_notifier;
+ err = unregister_switchdev_blocking_notifier(nb);
+ if (err)
+ pr_err("DSA: failed to unregister switchdev blocking notifier (%d)\n", err);
+
err = unregister_switchdev_notifier(&dsa_slave_switchdev_notifier);
if (err)
pr_err("DSA: failed to unregister switchdev notifier (%d)\n", err);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 2b06bb91318b..4aa1d368a5ae 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -174,6 +174,7 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
const struct dsa_device_ops brcm_netdev_ops = {
.xmit = brcm_tag_xmit,
.rcv = brcm_tag_rcv,
+ .overhead = BRCM_TAG_LEN,
};
#endif
@@ -196,5 +197,6 @@ static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb,
const struct dsa_device_ops brcm_prepend_netdev_ops = {
.xmit = brcm_tag_xmit_prepend,
.rcv = brcm_tag_rcv_prepend,
+ .overhead = BRCM_TAG_LEN,
};
#endif
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index cd13cfc542ce..8b2f92e3f3a2 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -149,4 +149,5 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
const struct dsa_device_ops dsa_netdev_ops = {
.xmit = dsa_xmit,
.rcv = dsa_rcv,
+ .overhead = DSA_HLEN,
};
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 4083326b806e..f5b87ee5c94e 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -168,4 +168,5 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
const struct dsa_device_ops edsa_netdev_ops = {
.xmit = edsa_xmit,
.rcv = edsa_rcv,
+ .overhead = EDSA_HLEN,
};
diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c
index 49e9b73f1be3..cb6f82ffe5eb 100644
--- a/net/dsa/tag_gswip.c
+++ b/net/dsa/tag_gswip.c
@@ -106,4 +106,5 @@ static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb,
const struct dsa_device_ops gswip_netdev_ops = {
.xmit = gswip_tag_xmit,
.rcv = gswip_tag_rcv,
+ .overhead = GSWIP_RX_HEADER_LEN,
};
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 0f62effad88f..da71b9e2af52 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -14,34 +14,18 @@
#include <net/dsa.h>
#include "dsa_priv.h"
-/* For Ingress (Host -> KSZ), 2 bytes are added before FCS.
- * ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
- * ---------------------------------------------------------------------------
- * tag0 : Prioritization (not used now)
- * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
- *
- * For Egress (KSZ -> Host), 1 byte is added before FCS.
- * ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
- * ---------------------------------------------------------------------------
- * tag0 : zero-based value represents port
- * (eg, 0x00=port1, 0x02=port3, 0x06=port7)
- */
-
-#define KSZ_INGRESS_TAG_LEN 2
-#define KSZ_EGRESS_TAG_LEN 1
+/* Typically only one byte is used for tail tag. */
+#define KSZ_EGRESS_TAG_LEN 1
-static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *ksz_common_xmit(struct sk_buff *skb,
+ struct net_device *dev, int len)
{
- struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
int padlen;
- u8 *tag;
padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
- if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) {
+ if (skb_tailroom(skb) >= padlen + len) {
/* Let dsa_slave_xmit() free skb */
if (__skb_put_padto(skb, skb->len + padlen, false))
return NULL;
@@ -49,7 +33,7 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
nskb = skb;
} else {
nskb = alloc_skb(NET_IP_ALIGN + skb->len +
- padlen + KSZ_INGRESS_TAG_LEN, GFP_ATOMIC);
+ padlen + len, GFP_ATOMIC);
if (!nskb)
return NULL;
skb_reserve(nskb, NET_IP_ALIGN);
@@ -70,33 +54,88 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
consume_skb(skb);
}
- tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
- tag[0] = 0;
- tag[1] = 1 << dp->index; /* destination port */
-
return nskb;
}
-static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt)
+static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
+ struct net_device *dev,
+ unsigned int port, unsigned int len)
{
- u8 *tag;
- int source_port;
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (!skb->dev)
+ return NULL;
- tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+ pskb_trim_rcsum(skb, skb->len - len);
- source_port = tag[0] & 7;
+ return skb;
+}
- skb->dev = dsa_master_find_slave(dev, 0, source_port);
- if (!skb->dev)
+/*
+ * For Ingress (Host -> KSZ9477), 2 bytes are added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : Prioritization (not used now)
+ * tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
+ *
+ * For Egress (KSZ9477 -> Host), 1 byte is added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : zero-based value represents port
+ * (eg, 0x00=port1, 0x02=port3, 0x06=port7)
+ */
+
+#define KSZ9477_INGRESS_TAG_LEN 2
+#define KSZ9477_PTP_TAG_LEN 4
+#define KSZ9477_PTP_TAG_INDICATION 0x80
+
+#define KSZ9477_TAIL_TAG_OVERRIDE BIT(9)
+#define KSZ9477_TAIL_TAG_LOOKUP BIT(10)
+
+static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct sk_buff *nskb;
+ u16 *tag;
+ u8 *addr;
+
+ nskb = ksz_common_xmit(skb, dev, KSZ9477_INGRESS_TAG_LEN);
+ if (!nskb)
return NULL;
- pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN);
+ /* Tag encoding */
+ tag = skb_put(nskb, KSZ9477_INGRESS_TAG_LEN);
+ addr = skb_mac_header(nskb);
- return skb;
+ *tag = BIT(dp->index);
+
+ if (is_link_local_ether_addr(addr))
+ *tag |= KSZ9477_TAIL_TAG_OVERRIDE;
+
+ *tag = cpu_to_be16(*tag);
+
+ return nskb;
+}
+
+static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt)
+{
+ /* Tag decoding */
+ u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+ unsigned int port = tag[0] & 7;
+ unsigned int len = KSZ_EGRESS_TAG_LEN;
+
+ /* Extra 4-bytes PTP timestamp */
+ if (tag[0] & KSZ9477_PTP_TAG_INDICATION)
+ len += KSZ9477_PTP_TAG_LEN;
+
+ return ksz_common_rcv(skb, dev, port, len);
}
-const struct dsa_device_ops ksz_netdev_ops = {
- .xmit = ksz_xmit,
- .rcv = ksz_rcv,
+const struct dsa_device_ops ksz9477_netdev_ops = {
+ .xmit = ksz9477_xmit,
+ .rcv = ksz9477_rcv,
+ .overhead = KSZ9477_INGRESS_TAG_LEN,
};
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 548c00254c07..f48889e46ff7 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -140,4 +140,5 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
const struct dsa_device_ops lan9303_netdev_ops = {
.xmit = lan9303_xmit,
.rcv = lan9303_rcv,
+ .overhead = LAN9303_TAG_LEN,
};
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 11535bc70743..f39f4dfeda34 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -109,4 +109,5 @@ const struct dsa_device_ops mtk_netdev_ops = {
.xmit = mtk_tag_xmit,
.rcv = mtk_tag_rcv,
.flow_dissect = mtk_tag_flow_dissect,
+ .overhead = MTK_HDR_LEN,
};
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 613f4ee97771..ed4f6dc26365 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -101,4 +101,5 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
const struct dsa_device_ops qca_netdev_ops = {
.xmit = qca_tag_xmit,
.rcv = qca_tag_rcv,
+ .overhead = QCA_HDR_LEN,
};
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 56197f0d9608..b40756ed6e57 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -84,4 +84,5 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
const struct dsa_device_ops trailer_netdev_ops = {
.xmit = trailer_xmit,
.rcv = trailer_rcv,
+ .overhead = 4,
};
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index fd8faa0dfa61..4c520110b04f 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -47,6 +47,7 @@
#include <linux/inet.h>
#include <linux/ip.h>
#include <linux/netdevice.h>
+#include <linux/nvmem-consumer.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
@@ -165,15 +166,17 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
eth = (struct ethhdr *)skb->data;
skb_pull_inline(skb, ETH_HLEN);
- if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
- if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
- skb->pkt_type = PACKET_BROADCAST;
- else
- skb->pkt_type = PACKET_MULTICAST;
+ if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
+ dev->dev_addr))) {
+ if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
+ if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
+ skb->pkt_type = PACKET_BROADCAST;
+ else
+ skb->pkt_type = PACKET_MULTICAST;
+ } else {
+ skb->pkt_type = PACKET_OTHERHOST;
+ }
}
- else if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
- dev->dev_addr)))
- skb->pkt_type = PACKET_OTHERHOST;
/*
* Some variants of DSA tagging don't have an ethertype field
@@ -548,3 +551,40 @@ int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr)
return 0;
}
EXPORT_SYMBOL(eth_platform_get_mac_address);
+
+/**
+ * Obtain the MAC address from an nvmem cell named 'mac-address' associated
+ * with given device.
+ *
+ * @dev: Device with which the mac-address cell is associated.
+ * @addrbuf: Buffer to which the MAC address will be copied on success.
+ *
+ * Returns 0 on success or a negative error number on failure.
+ */
+int nvmem_get_mac_address(struct device *dev, void *addrbuf)
+{
+ struct nvmem_cell *cell;
+ const void *mac;
+ size_t len;
+
+ cell = nvmem_cell_get(dev, "mac-address");
+ if (IS_ERR(cell))
+ return PTR_ERR(cell);
+
+ mac = nvmem_cell_read(cell, &len);
+ nvmem_cell_put(cell);
+
+ if (IS_ERR(mac))
+ return PTR_ERR(mac);
+
+ if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
+ kfree(mac);
+ return -EINVAL;
+ }
+
+ ether_addr_copy(addrbuf, mac);
+ kfree(mac);
+
+ return 0;
+}
+EXPORT_SYMBOL(nvmem_get_mac_address);
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index ca53efa17be1..8bec827081cd 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -48,6 +48,9 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev,
const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct neighbour *n;
+ if (!daddr)
+ return -EINVAL;
+
/* TODO:
* if this package isn't ipv6 one, where should it be routed?
*/
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index b231e40f006a..0c25c0bcc4da 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -242,7 +242,7 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info)
* dev_set_mac_address require RTNL_LOCK
*/
rtnl_lock();
- rc = dev_set_mac_address(dev, &addr);
+ rc = dev_set_mac_address(dev, &addr, NULL);
rtnl_unlock();
if (rc)
goto dev_unregister;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1fbe2f815474..0dfb72c46671 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -208,6 +208,7 @@ int inet_listen(struct socket *sock, int backlog)
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto out;
+ sk->sk_max_ack_backlog = backlog;
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
@@ -231,7 +232,6 @@ int inet_listen(struct socket *sock, int backlog)
goto out;
tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
}
- sk->sk_max_ack_backlog = backlog;
err = 0;
out:
@@ -1385,6 +1385,10 @@ out:
}
EXPORT_SYMBOL(inet_gso_segment);
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
+ struct sk_buff *));
struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
{
const struct net_offload *ops;
@@ -1494,7 +1498,8 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
skb_gro_pull(skb, sizeof(*iph));
skb_set_transport_header(skb, skb_gro_offset(skb));
- pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+ pp = indirect_call_gro_receive(tcp4_gro_receive, udp4_gro_receive,
+ ops->callbacks.gro_receive, head, skb);
out_unlock:
rcu_read_unlock();
@@ -1556,6 +1561,8 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
return -EINVAL;
}
+INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int));
int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
__be16 newlen = htons(skb->len - nhoff);
@@ -1581,7 +1588,9 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
* because any hdr with option will have been flushed in
* inet_gro_receive().
*/
- err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));
+ err = INDIRECT_CALL_2(ops->callbacks.gro_complete,
+ tcp4_gro_complete, udp4_gro_complete,
+ skb, nhoff + sizeof(*iph));
out_unlock:
rcu_read_unlock();
@@ -1964,6 +1973,8 @@ static int __init inet_init(void)
/* Add UDP-Lite (RFC 3828) */
udplite4_register();
+ raw_init();
+
ping_init();
/*
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 608a6f4223fb..04ba321ae5ce 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1101,7 +1101,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
inet_del_ifa(in_dev, ifap, 1);
break;
}
- ret = dev_change_flags(dev, ifr->ifr_flags);
+ ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
break;
case SIOCSIFADDR: /* Set interface address (and family) */
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 9e1c840596c5..5459f41fc26f 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -125,10 +125,13 @@ static void esp_output_done(struct crypto_async_request *base, int err)
void *tmp;
struct xfrm_state *x;
- if (xo && (xo->flags & XFRM_DEV_RESUME))
- x = skb->sp->xvec[skb->sp->len - 1];
- else
+ if (xo && (xo->flags & XFRM_DEV_RESUME)) {
+ struct sec_path *sp = skb_sec_path(skb);
+
+ x = sp->xvec[sp->len - 1];
+ } else {
x = skb_dst(skb)->xfrm;
+ }
tmp = ESP_SKB_CB(skb)->tmp;
esp_ssg_unref(x, tmp);
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 58834a10c0be..8756e0e790d2 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -46,11 +46,12 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
xo = xfrm_offload(skb);
if (!xo || !(xo->flags & CRYPTO_DONE)) {
- err = secpath_set(skb);
- if (err)
+ struct sec_path *sp = secpath_set(skb);
+
+ if (!sp)
goto out;
- if (skb->sp->len == XFRM_MAX_DEPTH)
+ if (sp->len == XFRM_MAX_DEPTH)
goto out;
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
@@ -59,8 +60,8 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
if (!x)
goto out;
- skb->sp->xvec[skb->sp->len++] = x;
- skb->sp->olen++;
+ sp->xvec[sp->len++] = x;
+ sp->olen++;
xo = xfrm_offload(skb);
if (!xo) {
@@ -114,6 +115,7 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
struct crypto_aead *aead;
netdev_features_t esp_features = features;
struct xfrm_offload *xo = xfrm_offload(skb);
+ struct sec_path *sp;
if (!xo)
return ERR_PTR(-EINVAL);
@@ -121,7 +123,8 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
return ERR_PTR(-EINVAL);
- x = skb->sp->xvec[skb->sp->len - 1];
+ sp = skb_sec_path(skb);
+ x = sp->xvec[sp->len - 1];
aead = x->data;
esph = ip_esp_hdr(skb);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b5c3937ca6ec..5022bc63863a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1076,7 +1076,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
if (!fi)
goto failure;
fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx,
- cfg->fc_mx_len);
+ cfg->fc_mx_len, extack);
if (unlikely(IS_ERR(fi->fib_metrics))) {
err = PTR_ERR(fi->fib_metrics);
kfree(fi);
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 500a59906b87..0c9f171fb085 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -3,6 +3,7 @@
#include <linux/socket.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
+#include <linux/icmp.h>
#include <linux/udp.h>
#include <linux/types.h>
#include <linux/kernel.h>
@@ -1003,15 +1004,89 @@ static int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
return 0;
}
+static int gue_err_proto_handler(int proto, struct sk_buff *skb, u32 info)
+{
+ const struct net_protocol *ipprot = rcu_dereference(inet_protos[proto]);
+
+ if (ipprot && ipprot->err_handler) {
+ if (!ipprot->err_handler(skb, info))
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int gue_err(struct sk_buff *skb, u32 info)
+{
+ int transport_offset = skb_transport_offset(skb);
+ struct guehdr *guehdr;
+ size_t optlen;
+ int ret;
+
+ if (skb->len < sizeof(struct udphdr) + sizeof(struct guehdr))
+ return -EINVAL;
+
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+
+ switch (guehdr->version) {
+ case 0: /* Full GUE header present */
+ break;
+ case 1: {
+ /* Direct encasulation of IPv4 or IPv6 */
+ skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
+
+ switch (((struct iphdr *)guehdr)->version) {
+ case 4:
+ ret = gue_err_proto_handler(IPPROTO_IPIP, skb, info);
+ goto out;
+#if IS_ENABLED(CONFIG_IPV6)
+ case 6:
+ ret = gue_err_proto_handler(IPPROTO_IPV6, skb, info);
+ goto out;
+#endif
+ default:
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+ }
+ default: /* Undefined version */
+ return -EOPNOTSUPP;
+ }
+
+ if (guehdr->control)
+ return -ENOENT;
+
+ optlen = guehdr->hlen << 2;
+
+ if (validate_gue_flags(guehdr, optlen))
+ return -EINVAL;
+
+ /* Handling exceptions for direct UDP encapsulation in GUE would lead to
+ * recursion. Besides, this kind of encapsulation can't even be
+ * configured currently. Discard this.
+ */
+ if (guehdr->proto_ctype == IPPROTO_UDP)
+ return -EOPNOTSUPP;
+
+ skb_set_transport_header(skb, -(int)sizeof(struct icmphdr));
+ ret = gue_err_proto_handler(guehdr->proto_ctype, skb, info);
+
+out:
+ skb_set_transport_header(skb, transport_offset);
+ return ret;
+}
+
static const struct ip_tunnel_encap_ops fou_iptun_ops = {
.encap_hlen = fou_encap_hlen,
.build_header = fou_build_header,
+ .err_handler = gue_err,
};
static const struct ip_tunnel_encap_ops gue_iptun_ops = {
.encap_hlen = gue_encap_hlen,
.build_header = gue_build_header,
+ .err_handler = gue_err,
};
static int ip_tunnel_encap_add_fou_ops(void)
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 7efe740c06eb..a4bf22ee3aed 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -151,20 +151,25 @@ drop:
return NET_RX_DROP;
}
-static void gre_err(struct sk_buff *skb, u32 info)
+static int gre_err(struct sk_buff *skb, u32 info)
{
const struct gre_protocol *proto;
const struct iphdr *iph = (const struct iphdr *)skb->data;
u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f;
+ int err = 0;
if (ver >= GREPROTO_MAX)
- return;
+ return -EINVAL;
rcu_read_lock();
proto = rcu_dereference(gre_proto[ver]);
if (proto && proto->err_handler)
proto->err_handler(skb, info);
+ else
+ err = -EPROTONOSUPPORT;
rcu_read_unlock();
+
+ return err;
}
static const struct net_protocol net_gre_protocol = {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index d832beed6e3a..065997f414e6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1079,7 +1079,7 @@ error:
goto drop;
}
-void icmp_err(struct sk_buff *skb, u32 info)
+int icmp_err(struct sk_buff *skb, u32 info)
{
struct iphdr *iph = (struct iphdr *)skb->data;
int offset = iph->ihl<<2;
@@ -1094,13 +1094,15 @@ void icmp_err(struct sk_buff *skb, u32 info)
*/
if (icmph->type != ICMP_ECHOREPLY) {
ping_err(skb, offset, info);
- return;
+ return 0;
}
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
ipv4_update_pmtu(skb, net, info, 0, IPPROTO_ICMP);
else if (type == ICMP_REDIRECT)
ipv4_redirect(skb, net, 0, IPPROTO_ICMP);
+
+ return 0;
}
/*
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 15e7f7915a21..6ea523d71947 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -183,7 +183,9 @@ inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *
int i, low, high, attempt_half;
struct inet_bind_bucket *tb;
u32 remaining, offset;
+ int l3mdev;
+ l3mdev = inet_sk_bound_l3mdev(sk);
attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
other_half_scan:
inet_get_local_port_range(net, &low, &high);
@@ -219,7 +221,8 @@ other_parity_scan:
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->port == port) {
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
if (!inet_csk_bind_conflict(sk, tb, false, false))
goto success;
goto next_port;
@@ -293,6 +296,9 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
struct net *net = sock_net(sk);
struct inet_bind_bucket *tb = NULL;
kuid_t uid = sock_i_uid(sk);
+ int l3mdev;
+
+ l3mdev = inet_sk_bound_l3mdev(sk);
if (!port) {
head = inet_csk_find_open_port(sk, &tb, &port);
@@ -306,11 +312,12 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
inet_bind_bucket_for_each(tb, &head->chain)
- if (net_eq(ib_net(tb), net) && tb->port == port)
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port)
goto tb_found;
tb_not_found:
tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
- net, head, port);
+ net, head, port, l3mdev);
if (!tb)
goto fail_unlock;
tb_found:
@@ -874,7 +881,6 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
reqsk_queue_alloc(&icsk->icsk_accept_queue);
- sk->sk_max_ack_backlog = backlog;
sk->sk_ack_backlog = 0;
inet_csk_delack_init(sk);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 411dd7a90046..942265d65eb3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -65,12 +65,14 @@ static u32 sk_ehashfn(const struct sock *sk)
struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
struct net *net,
struct inet_bind_hashbucket *head,
- const unsigned short snum)
+ const unsigned short snum,
+ int l3mdev)
{
struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
if (tb) {
write_pnet(&tb->ib_net, net);
+ tb->l3mdev = l3mdev;
tb->port = snum;
tb->fastreuse = 0;
tb->fastreuseport = 0;
@@ -135,6 +137,7 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
struct inet_bind_bucket *tb;
+ int l3mdev;
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
@@ -143,6 +146,8 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
return -ENOENT;
}
if (tb->port != port) {
+ l3mdev = inet_sk_bound_l3mdev(sk);
+
/* NOTE: using tproxy and redirecting skbs to a proxy
* on a different listener port breaks the assumption
* that the listener socket's icsk_bind_hash is the same
@@ -150,12 +155,13 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
* create a new bind bucket for the child here. */
inet_bind_bucket_for_each(tb, &head->chain) {
if (net_eq(ib_net(tb), sock_net(sk)) &&
- tb->port == port)
+ tb->l3mdev == l3mdev && tb->port == port)
break;
}
if (!tb) {
tb = inet_bind_bucket_create(table->bind_bucket_cachep,
- sock_net(sk), head, port);
+ sock_net(sk), head, port,
+ l3mdev);
if (!tb) {
spin_unlock(&head->lock);
return -ENOMEM;
@@ -228,26 +234,16 @@ static inline int compute_score(struct sock *sk, struct net *net,
const int dif, const int sdif, bool exact_dif)
{
int score = -1;
- struct inet_sock *inet = inet_sk(sk);
- if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
+ if (net_eq(sock_net(sk), net) && sk->sk_num == hnum &&
!ipv6_only_sock(sk)) {
- __be32 rcv_saddr = inet->inet_rcv_saddr;
+ if (sk->sk_rcv_saddr != daddr)
+ return -1;
+
+ if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
+ return -1;
+
score = sk->sk_family == PF_INET ? 2 : 1;
- if (rcv_saddr) {
- if (rcv_saddr != daddr)
- return -1;
- score += 4;
- }
- if (sk->sk_bound_dev_if || exact_dif) {
- bool dev_match = (sk->sk_bound_dev_if == dif ||
- sk->sk_bound_dev_if == sdif);
-
- if (!dev_match)
- return -1;
- if (sk->sk_bound_dev_if)
- score += 4;
- }
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
}
@@ -303,26 +299,12 @@ struct sock *__inet_lookup_listener(struct net *net,
const __be32 daddr, const unsigned short hnum,
const int dif, const int sdif)
{
- unsigned int hash = inet_lhashfn(net, hnum);
- struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
- bool exact_dif = inet_exact_dif_match(net, skb);
struct inet_listen_hashbucket *ilb2;
- struct sock *sk, *result = NULL;
- int score, hiscore = 0;
+ struct sock *result = NULL;
unsigned int hash2;
- u32 phash = 0;
-
- if (ilb->count <= 10 || !hashinfo->lhash2)
- goto port_lookup;
-
- /* Too many sk in the ilb bucket (which is hashed by port alone).
- * Try lhash2 (which is hashed by port and addr) instead.
- */
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
- if (ilb2->count > ilb->count)
- goto port_lookup;
result = inet_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
@@ -331,34 +313,12 @@ struct sock *__inet_lookup_listener(struct net *net,
goto done;
/* Lookup lhash2 with INADDR_ANY */
-
hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
- if (ilb2->count > ilb->count)
- goto port_lookup;
result = inet_lhash2_lookup(net, ilb2, skb, doff,
- saddr, sport, daddr, hnum,
+ saddr, sport, htonl(INADDR_ANY), hnum,
dif, sdif);
- goto done;
-
-port_lookup:
- sk_for_each_rcu(sk, &ilb->head) {
- score = compute_score(sk, net, hnum, daddr,
- dif, sdif, exact_dif);
- if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- goto done;
- }
- result = sk;
- hiscore = score;
- }
- }
done:
if (unlikely(IS_ERR(result)))
return NULL;
@@ -675,6 +635,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
u32 remaining, offset;
int ret, i, low, high;
static u32 hint;
+ int l3mdev;
if (port) {
head = &hinfo->bhash[inet_bhashfn(net, port,
@@ -693,6 +654,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
return ret;
}
+ l3mdev = inet_sk_bound_l3mdev(sk);
+
inet_get_local_port_range(net, &low, &high);
high++; /* [32768, 60999] -> [32768, 61000[ */
remaining = high - low;
@@ -719,7 +682,8 @@ other_parity_scan:
* the established check is already unique enough.
*/
inet_bind_bucket_for_each(tb, &head->chain) {
- if (net_eq(ib_net(tb), net) && tb->port == port) {
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
if (tb->fastreuse >= 0 ||
tb->fastreuseport >= 0)
goto next_port;
@@ -732,7 +696,7 @@ other_parity_scan:
}
tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
- net, head, port);
+ net, head, port, l3mdev);
if (!tb) {
spin_unlock_bh(&head->lock);
return -ENOMEM;
@@ -798,13 +762,22 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
}
EXPORT_SYMBOL_GPL(inet_hashinfo_init);
+static void init_hashinfo_lhash2(struct inet_hashinfo *h)
+{
+ int i;
+
+ for (i = 0; i <= h->lhash2_mask; i++) {
+ spin_lock_init(&h->lhash2[i].lock);
+ INIT_HLIST_HEAD(&h->lhash2[i].head);
+ h->lhash2[i].count = 0;
+ }
+}
+
void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
unsigned long numentries, int scale,
unsigned long low_limit,
unsigned long high_limit)
{
- unsigned int i;
-
h->lhash2 = alloc_large_system_hash(name,
sizeof(*h->lhash2),
numentries,
@@ -814,13 +787,23 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
&h->lhash2_mask,
low_limit,
high_limit);
+ init_hashinfo_lhash2(h);
+}
- for (i = 0; i <= h->lhash2_mask; i++) {
- spin_lock_init(&h->lhash2[i].lock);
- INIT_HLIST_HEAD(&h->lhash2[i].head);
- h->lhash2[i].count = 0;
- }
+int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
+{
+ h->lhash2 = kmalloc_array(INET_LHTABLE_SIZE, sizeof(*h->lhash2), GFP_KERNEL);
+ if (!h->lhash2)
+ return -ENOMEM;
+
+ h->lhash2_mask = INET_LHTABLE_SIZE - 1;
+ /* INET_LHTABLE_SIZE must be a power of 2 */
+ BUG_ON(INET_LHTABLE_SIZE & h->lhash2_mask);
+
+ init_hashinfo_lhash2(h);
+ return 0;
}
+EXPORT_SYMBOL_GPL(inet_hashinfo2_init_mod);
int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index d5984d31ab93..00ec819f949b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -69,6 +69,13 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s
__IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
__IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
+#ifdef CONFIG_NET_SWITCHDEV
+ if (skb->offload_l3_fwd_mark) {
+ consume_skb(skb);
+ return 0;
+ }
+#endif
+
if (unlikely(opt->optlen))
ip_forward_options(skb);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 38befe829caf..c7a7bd58a23c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -121,8 +121,8 @@ static unsigned int ipgre_net_id __read_mostly;
static unsigned int gre_tap_net_id __read_mostly;
static unsigned int erspan_net_id __read_mostly;
-static void ipgre_err(struct sk_buff *skb, u32 info,
- const struct tnl_ptk_info *tpi)
+static int ipgre_err(struct sk_buff *skb, u32 info,
+ const struct tnl_ptk_info *tpi)
{
/* All the routers (except for Linux) return only
@@ -146,17 +146,32 @@ static void ipgre_err(struct sk_buff *skb, u32 info,
unsigned int data_len = 0;
struct ip_tunnel *t;
+ if (tpi->proto == htons(ETH_P_TEB))
+ itn = net_generic(net, gre_tap_net_id);
+ else if (tpi->proto == htons(ETH_P_ERSPAN) ||
+ tpi->proto == htons(ETH_P_ERSPAN2))
+ itn = net_generic(net, erspan_net_id);
+ else
+ itn = net_generic(net, ipgre_net_id);
+
+ iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
+ t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
+ iph->daddr, iph->saddr, tpi->key);
+
+ if (!t)
+ return -ENOENT;
+
switch (type) {
default:
case ICMP_PARAMETERPROB:
- return;
+ return 0;
case ICMP_DEST_UNREACH:
switch (code) {
case ICMP_SR_FAILED:
case ICMP_PORT_UNREACH:
/* Impossible event. */
- return;
+ return 0;
default:
/* All others are translated to HOST_UNREACH.
rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -168,7 +183,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info,
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
- return;
+ return 0;
data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
break;
@@ -176,40 +191,27 @@ static void ipgre_err(struct sk_buff *skb, u32 info,
break;
}
- if (tpi->proto == htons(ETH_P_TEB))
- itn = net_generic(net, gre_tap_net_id);
- else if (tpi->proto == htons(ETH_P_ERSPAN) ||
- tpi->proto == htons(ETH_P_ERSPAN2))
- itn = net_generic(net, erspan_net_id);
- else
- itn = net_generic(net, ipgre_net_id);
-
- iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
- t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
- iph->daddr, iph->saddr, tpi->key);
-
- if (!t)
- return;
-
#if IS_ENABLED(CONFIG_IPV6)
if (tpi->proto == htons(ETH_P_IPV6) &&
!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
type, data_len))
- return;
+ return 0;
#endif
if (t->parms.iph.daddr == 0 ||
ipv4_is_multicast(t->parms.iph.daddr))
- return;
+ return 0;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
- return;
+ return 0;
if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
t->err_count++;
else
t->err_count = 1;
t->err_time = jiffies;
+
+ return 0;
}
static void gre_err(struct sk_buff *skb, u32 info)
@@ -1339,12 +1341,6 @@ static void ipgre_tap_setup(struct net_device *dev)
ip_tunnel_setup(dev, gre_tap_net_id);
}
-bool is_gretap_dev(const struct net_device *dev)
-{
- return dev->netdev_ops == &gre_tap_netdev_ops;
-}
-EXPORT_SYMBOL_GPL(is_gretap_dev);
-
static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
@@ -1601,7 +1597,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
memset(&tb, 0, sizeof(tb));
dev = rtnl_create_link(net, name, name_assign_type,
- &ipgre_tap_ops, tb);
+ &ipgre_tap_ops, tb, NULL);
if (IS_ERR(dev))
return dev;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e609b08c9df4..26921f6b3b92 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -188,51 +188,50 @@ bool ip_call_ra_chain(struct sk_buff *skb)
return false;
}
-static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol)
{
- __skb_pull(skb, skb_network_header_len(skb));
-
- rcu_read_lock();
- {
- int protocol = ip_hdr(skb)->protocol;
- const struct net_protocol *ipprot;
- int raw;
+ const struct net_protocol *ipprot;
+ int raw, ret;
- resubmit:
- raw = raw_local_deliver(skb, protocol);
+resubmit:
+ raw = raw_local_deliver(skb, protocol);
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot) {
- int ret;
-
- if (!ipprot->no_policy) {
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- kfree_skb(skb);
- goto out;
- }
- nf_reset(skb);
+ ipprot = rcu_dereference(inet_protos[protocol]);
+ if (ipprot) {
+ if (!ipprot->no_policy) {
+ if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ kfree_skb(skb);
+ return;
}
- ret = ipprot->handler(skb);
- if (ret < 0) {
- protocol = -ret;
- goto resubmit;
+ nf_reset(skb);
+ }
+ ret = ipprot->handler(skb);
+ if (ret < 0) {
+ protocol = -ret;
+ goto resubmit;
+ }
+ __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
+ } else {
+ if (!raw) {
+ if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
+ icmp_send(skb, ICMP_DEST_UNREACH,
+ ICMP_PROT_UNREACH, 0);
}
- __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
+ kfree_skb(skb);
} else {
- if (!raw) {
- if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
- __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS);
- icmp_send(skb, ICMP_DEST_UNREACH,
- ICMP_PROT_UNREACH, 0);
- }
- kfree_skb(skb);
- } else {
- __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
- consume_skb(skb);
- }
+ __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS);
+ consume_skb(skb);
}
}
- out:
+}
+
+static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ __skb_pull(skb, skb_network_header_len(skb));
+
+ rcu_read_lock();
+ ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol);
rcu_read_unlock();
return 0;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5dbec21856f4..c80188875f39 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -533,6 +533,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
+ skb_ext_copy(to, from);
#if IS_ENABLED(CONFIG_IP_VS)
to->ipvs_property = from->ipvs_property;
#endif
@@ -867,6 +868,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int flags)
{
struct inet_sock *inet = inet_sk(sk);
+ struct ubuf_info *uarg = NULL;
struct sk_buff *skb;
struct ip_options *opt = cork->opt;
@@ -880,8 +882,8 @@ static int __ip_append_data(struct sock *sk,
int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst;
unsigned int wmem_alloc_delta = 0;
+ bool paged, extra_uref;
u32 tskey = 0;
- bool paged;
skb = skb_peek_tail(queue);
@@ -916,6 +918,20 @@ static int __ip_append_data(struct sock *sk,
(!exthdrlen || (rt->dst.dev->features & NETIF_F_HW_ESP_TX_CSUM)))
csummode = CHECKSUM_PARTIAL;
+ if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
+ uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ if (!uarg)
+ return -ENOBUFS;
+ extra_uref = true;
+ if (rt->dst.dev->features & NETIF_F_SG &&
+ csummode == CHECKSUM_PARTIAL) {
+ paged = true;
+ } else {
+ uarg->zerocopy = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
+ }
+ }
+
cork->length += length;
/* So, what's going on in the loop below?
@@ -1001,12 +1017,6 @@ alloc_new_skb:
skb->csum = 0;
skb_reserve(skb, hh_len);
- /* only the initial fragment is time stamped */
- skb_shinfo(skb)->tx_flags = cork->tx_flags;
- cork->tx_flags = 0;
- skb_shinfo(skb)->tskey = tskey;
- tskey = 0;
-
/*
* Find where to start putting bytes.
*/
@@ -1039,6 +1049,13 @@ alloc_new_skb:
exthdrlen = 0;
csummode = CHECKSUM_NONE;
+ /* only the initial fragment is time stamped */
+ skb_shinfo(skb)->tx_flags = cork->tx_flags;
+ cork->tx_flags = 0;
+ skb_shinfo(skb)->tskey = tskey;
+ tskey = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
+
if ((flags & MSG_CONFIRM) && !skb_prev)
skb_set_dst_pending_confirm(skb, 1);
@@ -1068,7 +1085,7 @@ alloc_new_skb:
err = -EFAULT;
goto error;
}
- } else {
+ } else if (!uarg || !uarg->zerocopy) {
int i = skb_shinfo(skb)->nr_frags;
err = -ENOMEM;
@@ -1098,6 +1115,10 @@ alloc_new_skb:
skb->data_len += copy;
skb->truesize += copy;
wmem_alloc_delta += copy;
+ } else {
+ err = skb_zerocopy_iter_dgram(skb, from, copy);
+ if (err < 0)
+ goto error;
}
offset += copy;
length -= copy;
@@ -1110,6 +1131,8 @@ alloc_new_skb:
error_efault:
err = -EFAULT;
error:
+ if (uarg)
+ sock_zerocopy_put_abort(uarg, extra_uref);
cork->length -= length;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index c248e0dccbe1..9a0e67b52a4e 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -120,7 +120,7 @@ int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
}
skb_clear_hash_if_not_l4(skb);
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
skb_set_queue_mapping(skb, 0);
skb_scrub_packet(skb, xnet);
@@ -151,6 +151,7 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
sizeof(struct in6_addr));
else
dst->key.u.ipv4.dst = src->key.u.ipv4.src;
+ dst->key.tun_flags = src->key.tun_flags;
dst->mode = src->mode | IP_TUNNEL_INFO_TX;
return res;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 2393e5c106bf..b9a9873c25c6 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -220,7 +220,7 @@ static int __init ic_open_devs(void)
for_each_netdev(&init_net, dev) {
if (!(dev->flags & IFF_LOOPBACK) && !netdev_uses_dsa(dev))
continue;
- if (dev_change_flags(dev, dev->flags | IFF_UP) < 0)
+ if (dev_change_flags(dev, dev->flags | IFF_UP, NULL) < 0)
pr_err("IP-Config: Failed to open %s\n", dev->name);
}
@@ -238,7 +238,7 @@ static int __init ic_open_devs(void)
if (ic_proto_enabled && !able)
continue;
oflags = dev->flags;
- if (dev_change_flags(dev, oflags | IFF_UP) < 0) {
+ if (dev_change_flags(dev, oflags | IFF_UP, NULL) < 0) {
pr_err("IP-Config: Failed to open %s\n",
dev->name);
continue;
@@ -315,7 +315,7 @@ static void __init ic_close_devs(void)
dev = d->dev;
if (d != ic_dev && !netdev_uses_dsa(dev)) {
pr_debug("IP-Config: Downing %s\n", dev->name);
- dev_change_flags(dev, d->flags);
+ dev_change_flags(dev, d->flags, NULL);
}
kfree(d);
}
@@ -1363,18 +1363,7 @@ static int ntp_servers_seq_show(struct seq_file *seq, void *v)
}
return 0;
}
-
-static int ntp_servers_seq_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ntp_servers_seq_show, NULL);
-}
-
-static const struct file_operations ntp_servers_seq_fops = {
- .open = ntp_servers_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ntp_servers_seq);
#endif /* CONFIG_PROC_FS */
/*
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index e65287c27e3d..57c5dd283a2c 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -140,6 +140,13 @@ static int ipip_err(struct sk_buff *skb, u32 info)
struct ip_tunnel *t;
int err = 0;
+ t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ iph->daddr, iph->saddr, 0);
+ if (!t) {
+ err = -ENOENT;
+ goto out;
+ }
+
switch (type) {
case ICMP_DEST_UNREACH:
switch (code) {
@@ -167,13 +174,6 @@ static int ipip_err(struct sk_buff *skb, u32 info)
goto out;
}
- t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
- iph->daddr, iph->saddr, 0);
- if (!t) {
- err = -ENOENT;
- goto out;
- }
-
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, net, info, t->parms.link, iph->protocol);
goto out;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index e7a3879cedd0..ddbf8c9a1abb 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -508,7 +508,7 @@ static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
dev->flags |= IFF_MULTICAST;
if (!ipmr_init_vif_indev(dev))
goto failure;
- if (dev_open(dev))
+ if (dev_open(dev, NULL))
goto failure;
dev_hold(dev);
}
@@ -591,7 +591,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
if (!ipmr_init_vif_indev(dev))
goto failure;
- if (dev_open(dev))
+ if (dev_open(dev, NULL))
goto failure;
dev_hold(dev);
@@ -1806,7 +1806,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
struct vif_device *out_vif = &mrt->vif_table[out_vifi];
struct vif_device *in_vif = &mrt->vif_table[in_vifi];
- if (!skb->offload_mr_fwd_mark)
+ if (!skb->offload_l3_fwd_mark)
return false;
if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
return false;
@@ -1824,8 +1824,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
/* Processing handlers for ipmr_forward */
static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
- int in_vifi, struct sk_buff *skb,
- struct mfc_cache *c, int vifi)
+ int in_vifi, struct sk_buff *skb, int vifi)
{
const struct iphdr *iph = ip_hdr(skb);
struct vif_device *vif = &mrt->vif_table[vifi];
@@ -2031,7 +2030,7 @@ forward:
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi,
- skb2, c, psend);
+ skb2, psend);
}
psend = ct;
}
@@ -2043,9 +2042,9 @@ last_forward:
if (skb2)
ipmr_queue_xmit(net, mrt, true_vifi, skb2,
- c, psend);
+ psend);
} else {
- ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb, psend);
return;
}
}
diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c
index 6d218f5a2e71..ca9a5fefdefa 100644
--- a/net/ipv4/metrics.c
+++ b/net/ipv4/metrics.c
@@ -6,7 +6,8 @@
#include <net/tcp.h>
static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
- int fc_mx_len, u32 *metrics)
+ int fc_mx_len, u32 *metrics,
+ struct netlink_ext_ack *extack)
{
bool ecn_ca = false;
struct nlattr *nla;
@@ -21,19 +22,26 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
if (!type)
continue;
- if (type > RTAX_MAX)
+ if (type > RTAX_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid metric type");
return -EINVAL;
+ }
if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];
nla_strlcpy(tmp, nla, sizeof(tmp));
val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
- if (val == TCP_CA_UNSPEC)
+ if (val == TCP_CA_UNSPEC) {
+ NL_SET_ERR_MSG(extack, "Unknown tcp congestion algorithm");
return -EINVAL;
+ }
} else {
- if (nla_len(nla) != sizeof(u32))
+ if (nla_len(nla) != sizeof(u32)) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "Invalid attribute in metrics");
return -EINVAL;
+ }
val = nla_get_u32(nla);
}
if (type == RTAX_ADVMSS && val > 65535 - 40)
@@ -42,8 +50,10 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
val = 65535 - 15;
if (type == RTAX_HOPLIMIT && val > 255)
val = 255;
- if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
+ if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) {
+ NL_SET_ERR_MSG(extack, "Unknown flag set in feature mask in metrics attribute");
return -EINVAL;
+ }
metrics[type - 1] = val;
}
@@ -54,7 +64,8 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
}
struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
- int fc_mx_len)
+ int fc_mx_len,
+ struct netlink_ext_ack *extack)
{
struct dst_metrics *fib_metrics;
int err;
@@ -66,7 +77,8 @@ struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx,
if (unlikely(!fib_metrics))
return ERR_PTR(-ENOMEM);
- err = ip_metrics_convert(net, fc_mx, fc_mx_len, fib_metrics->metrics);
+ err = ip_metrics_convert(net, fc_mx, fc_mx_len, fib_metrics->metrics,
+ extack);
if (!err) {
refcount_set(&fib_metrics->refcnt, 1);
} else {
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 184bf2e0a1ed..80f72cc5ca8d 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -156,15 +156,10 @@ config NF_NAT_SNMP_BASIC
To compile it as a module, choose M here. If unsure, say N.
-config NF_NAT_PROTO_GRE
- tristate
- depends on NF_CT_PROTO_GRE
-
config NF_NAT_PPTP
tristate
depends on NF_CONNTRACK
default NF_CONNTRACK_PPTP
- select NF_NAT_PROTO_GRE
config NF_NAT_H323
tristate
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 367993adf4d3..fd7122e0e2c9 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -3,7 +3,7 @@
# Makefile for the netfilter modules on top of IPv4.
#
-nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
+nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o
nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
@@ -28,9 +28,6 @@ nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o
$(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
-# NAT protocols (nf_nat)
-obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
-
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2c8d313ae216..b61977db9b7f 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -56,18 +56,15 @@ struct clusterip_config {
#endif
enum clusterip_hashmode hash_mode; /* which hashing mode */
u_int32_t hash_initval; /* hash initialization */
- struct rcu_head rcu;
-
+ struct rcu_head rcu; /* for call_rcu_bh */
+ struct net *net; /* netns for pernet list */
char ifname[IFNAMSIZ]; /* device ifname */
- struct notifier_block notifier; /* refresh c->ifindex in it */
};
#ifdef CONFIG_PROC_FS
static const struct file_operations clusterip_proc_fops;
#endif
-static unsigned int clusterip_net_id __read_mostly;
-
struct clusterip_net {
struct list_head configs;
/* lock protects the configs list */
@@ -75,51 +72,66 @@ struct clusterip_net {
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *procdir;
+ /* mutex protects the config->pde*/
+ struct mutex mutex;
#endif
};
+static unsigned int clusterip_net_id __read_mostly;
+static inline struct clusterip_net *clusterip_pernet(struct net *net)
+{
+ return net_generic(net, clusterip_net_id);
+}
+
static inline void
clusterip_config_get(struct clusterip_config *c)
{
refcount_inc(&c->refcount);
}
-
static void clusterip_config_rcu_free(struct rcu_head *head)
{
- kfree(container_of(head, struct clusterip_config, rcu));
+ struct clusterip_config *config;
+ struct net_device *dev;
+
+ config = container_of(head, struct clusterip_config, rcu);
+ dev = dev_get_by_name(config->net, config->ifname);
+ if (dev) {
+ dev_mc_del(dev, config->clustermac);
+ dev_put(dev);
+ }
+ kfree(config);
}
static inline void
clusterip_config_put(struct clusterip_config *c)
{
if (refcount_dec_and_test(&c->refcount))
- call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
+ call_rcu(&c->rcu, clusterip_config_rcu_free);
}
/* decrease the count of entries using/referencing this config. If last
* entry(rule) is removed, remove the config from lists, but don't free it
* yet, since proc-files could still be holding references */
static inline void
-clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
+clusterip_config_entry_put(struct clusterip_config *c)
{
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(c->net);
local_bh_disable();
if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
+ list_del_rcu(&c->list);
+ spin_unlock(&cn->lock);
+ local_bh_enable();
/* In case anyone still accesses the file, the open/close
* functions are also incrementing the refcount on their own,
* so it's safe to remove the entry even if it's in use. */
#ifdef CONFIG_PROC_FS
+ mutex_lock(&cn->mutex);
if (cn->procdir)
proc_remove(c->pde);
+ mutex_unlock(&cn->mutex);
#endif
- list_del_rcu(&c->list);
- spin_unlock(&cn->lock);
- local_bh_enable();
-
- unregister_netdevice_notifier(&c->notifier);
-
return;
}
local_bh_enable();
@@ -129,7 +141,7 @@ static struct clusterip_config *
__clusterip_config_find(struct net *net, __be32 clusterip)
{
struct clusterip_config *c;
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(net);
list_for_each_entry_rcu(c, &cn->configs, list) {
if (c->clusterip == clusterip)
@@ -181,32 +193,37 @@ clusterip_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+ struct clusterip_net *cn = clusterip_pernet(net);
struct clusterip_config *c;
- c = container_of(this, struct clusterip_config, notifier);
- switch (event) {
- case NETDEV_REGISTER:
- if (!strcmp(dev->name, c->ifname)) {
- c->ifindex = dev->ifindex;
- dev_mc_add(dev, c->clustermac);
- }
- break;
- case NETDEV_UNREGISTER:
- if (dev->ifindex == c->ifindex) {
- dev_mc_del(dev, c->clustermac);
- c->ifindex = -1;
- }
- break;
- case NETDEV_CHANGENAME:
- if (!strcmp(dev->name, c->ifname)) {
- c->ifindex = dev->ifindex;
- dev_mc_add(dev, c->clustermac);
- } else if (dev->ifindex == c->ifindex) {
- dev_mc_del(dev, c->clustermac);
- c->ifindex = -1;
+ spin_lock_bh(&cn->lock);
+ list_for_each_entry_rcu(c, &cn->configs, list) {
+ switch (event) {
+ case NETDEV_REGISTER:
+ if (!strcmp(dev->name, c->ifname)) {
+ c->ifindex = dev->ifindex;
+ dev_mc_add(dev, c->clustermac);
+ }
+ break;
+ case NETDEV_UNREGISTER:
+ if (dev->ifindex == c->ifindex) {
+ dev_mc_del(dev, c->clustermac);
+ c->ifindex = -1;
+ }
+ break;
+ case NETDEV_CHANGENAME:
+ if (!strcmp(dev->name, c->ifname)) {
+ c->ifindex = dev->ifindex;
+ dev_mc_add(dev, c->clustermac);
+ } else if (dev->ifindex == c->ifindex) {
+ dev_mc_del(dev, c->clustermac);
+ c->ifindex = -1;
+ }
+ break;
}
- break;
}
+ spin_unlock_bh(&cn->lock);
return NOTIFY_DONE;
}
@@ -215,30 +232,44 @@ static struct clusterip_config *
clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
__be32 ip, const char *iniface)
{
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(net);
struct clusterip_config *c;
+ struct net_device *dev;
int err;
+ if (iniface[0] == '\0') {
+ pr_info("Please specify an interface name\n");
+ return ERR_PTR(-EINVAL);
+ }
+
c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
return ERR_PTR(-ENOMEM);
- strcpy(c->ifname, iniface);
- c->ifindex = -1;
- c->clusterip = ip;
+ dev = dev_get_by_name(net, iniface);
+ if (!dev) {
+ pr_info("no such interface %s\n", iniface);
+ kfree(c);
+ return ERR_PTR(-ENOENT);
+ }
+ c->ifindex = dev->ifindex;
+ strcpy(c->ifname, dev->name);
memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
+ dev_mc_add(dev, c->clustermac);
+ dev_put(dev);
+
+ c->clusterip = ip;
c->num_total_nodes = i->num_total_nodes;
clusterip_config_init_nodelist(c, i);
c->hash_mode = i->hash_mode;
c->hash_initval = i->hash_initval;
+ c->net = net;
refcount_set(&c->refcount, 1);
spin_lock_bh(&cn->lock);
if (__clusterip_config_find(net, ip)) {
- spin_unlock_bh(&cn->lock);
- kfree(c);
-
- return ERR_PTR(-EBUSY);
+ err = -EBUSY;
+ goto out_config_put;
}
list_add_rcu(&c->list, &cn->configs);
@@ -250,9 +281,11 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
/* create proc dir entry */
sprintf(buffer, "%pI4", &ip);
+ mutex_lock(&cn->mutex);
c->pde = proc_create_data(buffer, 0600,
cn->procdir,
&clusterip_proc_fops, c);
+ mutex_unlock(&cn->mutex);
if (!c->pde) {
err = -ENOMEM;
goto err;
@@ -260,22 +293,17 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
}
#endif
- c->notifier.notifier_call = clusterip_netdev_event;
- err = register_netdevice_notifier(&c->notifier);
- if (!err) {
- refcount_set(&c->entries, 1);
- return c;
- }
+ refcount_set(&c->entries, 1);
+ return c;
#ifdef CONFIG_PROC_FS
- proc_remove(c->pde);
err:
#endif
spin_lock_bh(&cn->lock);
list_del_rcu(&c->list);
+out_config_put:
spin_unlock_bh(&cn->lock);
clusterip_config_put(c);
-
return ERR_PTR(err);
}
@@ -475,34 +503,20 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
&e->ip.dst.s_addr);
return -EINVAL;
} else {
- struct net_device *dev;
-
- if (e->ip.iniface[0] == '\0') {
- pr_info("Please specify an interface name\n");
- return -EINVAL;
- }
-
- dev = dev_get_by_name(par->net, e->ip.iniface);
- if (!dev) {
- pr_info("no such interface %s\n",
- e->ip.iniface);
- return -ENOENT;
- }
- dev_put(dev);
-
config = clusterip_config_init(par->net, cipinfo,
e->ip.dst.s_addr,
e->ip.iniface);
if (IS_ERR(config))
return PTR_ERR(config);
}
- }
+ } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN))
+ return -EINVAL;
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0) {
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
- clusterip_config_entry_put(par->net, config);
+ clusterip_config_entry_put(config);
clusterip_config_put(config);
return ret;
}
@@ -524,7 +538,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
/* if no more entries are referencing the config, remove it
* from the list and destroy the proc entry */
- clusterip_config_entry_put(par->net, cipinfo->config);
+ clusterip_config_entry_put(cipinfo->config);
clusterip_config_put(cipinfo->config);
@@ -806,7 +820,7 @@ static const struct file_operations clusterip_proc_fops = {
static int clusterip_net_init(struct net *net)
{
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(net);
int ret;
INIT_LIST_HEAD(&cn->configs);
@@ -824,6 +838,7 @@ static int clusterip_net_init(struct net *net)
pr_err("Unable to proc dir entry\n");
return -ENOMEM;
}
+ mutex_init(&cn->mutex);
#endif /* CONFIG_PROC_FS */
return 0;
@@ -831,13 +846,15 @@ static int clusterip_net_init(struct net *net)
static void clusterip_net_exit(struct net *net)
{
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(net);
+
#ifdef CONFIG_PROC_FS
+ mutex_lock(&cn->mutex);
proc_remove(cn->procdir);
cn->procdir = NULL;
+ mutex_unlock(&cn->mutex);
#endif
nf_unregister_net_hook(net, &cip_arp_ops);
- WARN_ON_ONCE(!list_empty(&cn->configs));
}
static struct pernet_operations clusterip_net_ops = {
@@ -847,6 +864,10 @@ static struct pernet_operations clusterip_net_ops = {
.size = sizeof(struct clusterip_net),
};
+struct notifier_block cip_netdev_notifier = {
+ .notifier_call = clusterip_netdev_event
+};
+
static int __init clusterip_tg_init(void)
{
int ret;
@@ -859,11 +880,17 @@ static int __init clusterip_tg_init(void)
if (ret < 0)
goto cleanup_subsys;
+ ret = register_netdevice_notifier(&cip_netdev_notifier);
+ if (ret < 0)
+ goto unregister_target;
+
pr_info("ClusterIP Version %s loaded successfully\n",
CLUSTERIP_VERSION);
return 0;
+unregister_target:
+ xt_unregister_target(&clusterip_tg_reg);
cleanup_subsys:
unregister_pernet_subsys(&clusterip_net_ops);
return ret;
@@ -873,11 +900,12 @@ static void __exit clusterip_tg_exit(void)
{
pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
+ unregister_netdevice_notifier(&cip_netdev_notifier);
xt_unregister_target(&clusterip_tg_reg);
unregister_pernet_subsys(&clusterip_net_ops);
- /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
- rcu_barrier_bh();
+ /* Wait for completion of call_rcu()'s (clusterip_config_rcu_free) */
+ rcu_barrier();
}
module_init(clusterip_tg_init);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 78a67f961d86..2687db015b6f 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -62,22 +62,8 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
}
#endif /* CONFIG_XFRM */
-static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
- const struct nf_nat_range2 *range)
-{
- return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
- ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
-}
-
-static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t,
- __be16 dport)
-{
- return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport);
-}
-
static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
- const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *target,
enum nf_nat_manip_type maniptype)
{
@@ -90,8 +76,8 @@ static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
iph = (void *)skb->data + iphdroff;
hdroff = iphdroff + iph->ihl * 4;
- if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff,
- target, maniptype))
+ if (!nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff,
+ hdroff, target, maniptype))
return false;
iph = (void *)skb->data + iphdroff;
@@ -161,8 +147,6 @@ static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
.l3proto = NFPROTO_IPV4,
- .in_range = nf_nat_ipv4_in_range,
- .secure_port = nf_nat_ipv4_secure_port,
.manip_pkt = nf_nat_ipv4_manip_pkt,
.csum_update = nf_nat_ipv4_csum_update,
.csum_recalc = nf_nat_ipv4_csum_recalc,
@@ -186,7 +170,6 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
unsigned int hdrlen = ip_hdrlen(skb);
- const struct nf_nat_l4proto *l4proto;
struct nf_conntrack_tuple target;
unsigned long statusbit;
@@ -217,9 +200,8 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
if (!(ct->status & statusbit))
return 1;
- l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol);
if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
- l4proto, &ct->tuplehash[!dir].tuple, !manip))
+ &ct->tuplehash[!dir].tuple, !manip))
return 0;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
@@ -233,8 +215,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
/* Change outer to look like the reply to an incoming packet */
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0);
- if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip))
+ if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
return 0;
return 1;
@@ -391,26 +372,12 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_unregister_fn);
static int __init nf_nat_l3proto_ipv4_init(void)
{
- int err;
-
- err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
- if (err < 0)
- goto err1;
- err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
- if (err < 0)
- goto err2;
- return err;
-
-err2:
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
-err1:
- return err;
+ return nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
}
static void __exit nf_nat_l3proto_ipv4_exit(void)
{
nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
}
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 5d259a12e25f..68b4d450391b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -299,8 +299,6 @@ pptp_inbound_pkt(struct sk_buff *skb,
static int __init nf_nat_helper_pptp_init(void)
{
- nf_nat_need_gre();
-
BUG_ON(nf_nat_pptp_hook_outbound != NULL);
RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
deleted file mode 100644
index 00fda6331ce5..000000000000
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * nf_nat_proto_gre.c
- *
- * NAT protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-#include <linux/netfilter/nf_conntrack_proto_gre.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
-
-/* generate unique tuple ... */
-static void
-gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- static u_int16_t key;
- __be16 *keyptr;
- unsigned int min, i, range_size;
-
- /* If there is no master conntrack we are not PPTP,
- do not change tuples */
- if (!ct->master)
- return;
-
- if (maniptype == NF_NAT_MANIP_SRC)
- keyptr = &tuple->src.u.gre.key;
- else
- keyptr = &tuple->dst.u.gre.key;
-
- if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
- pr_debug("%p: NATing GRE PPTP\n", ct);
- min = 1;
- range_size = 0xffff;
- } else {
- min = ntohs(range->min_proto.gre.key);
- range_size = ntohs(range->max_proto.gre.key) - min + 1;
- }
-
- pr_debug("min = %u, range_size = %u\n", min, range_size);
-
- for (i = 0; ; ++key) {
- *keyptr = htons(min + key % range_size);
- if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
- return;
- }
-
- pr_debug("%p: no NAT mapping\n", ct);
- return;
-}
-
-/* manipulate a GRE packet according to maniptype */
-static bool
-gre_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- const struct gre_base_hdr *greh;
- struct pptp_gre_header *pgreh;
-
- /* pgreh includes two optional 32bit fields which are not required
- * to be there. That's where the magic '8' comes from */
- if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
- return false;
-
- greh = (void *)skb->data + hdroff;
- pgreh = (struct pptp_gre_header *)greh;
-
- /* we only have destination manip of a packet, since 'source key'
- * is not present in the packet itself */
- if (maniptype != NF_NAT_MANIP_DST)
- return true;
-
- switch (greh->flags & GRE_VERSION) {
- case GRE_VERSION_0:
- /* We do not currently NAT any GREv0 packets.
- * Try to behave like "nf_nat_proto_unknown" */
- break;
- case GRE_VERSION_1:
- pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
- pgreh->call_id = tuple->dst.u.gre.key;
- break;
- default:
- pr_debug("can't nat unknown GRE version\n");
- return false;
- }
- return true;
-}
-
-static const struct nf_nat_l4proto gre = {
- .l4proto = IPPROTO_GRE,
- .manip_pkt = gre_manip_pkt,
- .in_range = nf_nat_l4proto_in_range,
- .unique_tuple = gre_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
-#endif
-};
-
-static int __init nf_nat_proto_gre_init(void)
-{
- return nf_nat_l4proto_register(NFPROTO_IPV4, &gre);
-}
-
-static void __exit nf_nat_proto_gre_fini(void)
-{
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre);
-}
-
-module_init(nf_nat_proto_gre_init);
-module_exit(nf_nat_proto_gre_fini);
-
-void nf_nat_need_gre(void)
-{
- return;
-}
-EXPORT_SYMBOL_GPL(nf_nat_need_gre);
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
deleted file mode 100644
index 6d7cf1d79baf..000000000000
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/ip.h>
-#include <linux/icmp.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static bool
-icmp_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
-{
- return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
- ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
-}
-
-static void
-icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- static u_int16_t id;
- unsigned int range_size;
- unsigned int i;
-
- range_size = ntohs(range->max_proto.icmp.id) -
- ntohs(range->min_proto.icmp.id) + 1;
- /* If no range specified... */
- if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
- range_size = 0xFFFF;
-
- for (i = 0; ; ++id) {
- tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
- (id % range_size));
- if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
- return;
- }
- return;
-}
-
-static bool
-icmp_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- struct icmphdr *hdr;
-
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
- return false;
-
- hdr = (struct icmphdr *)(skb->data + hdroff);
- inet_proto_csum_replace2(&hdr->checksum, skb,
- hdr->un.echo.id, tuple->src.u.icmp.id, false);
- hdr->un.echo.id = tuple->src.u.icmp.id;
- return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_icmp = {
- .l4proto = IPPROTO_ICMP,
- .manip_pkt = icmp_manip_pkt,
- .in_range = icmp_in_range,
- .unique_tuple = icmp_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 5cd06ba3535d..aa8304c618b8 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -102,6 +102,7 @@ EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
/* Send RST reply */
void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
{
+ struct net_device *br_indev __maybe_unused;
struct sk_buff *nskb;
struct iphdr *niph;
const struct tcphdr *oth;
@@ -147,10 +148,11 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
- if (oldskb->nf_bridge) {
+ br_indev = nf_bridge_get_physindev(oldskb);
+ if (br_indev) {
struct ethhdr *oeth = eth_hdr(oldskb);
- nskb->dev = nf_bridge_get_physindev(oldskb);
+ nskb->dev = br_indev;
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 70289682a670..c3610b37bb4c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -219,6 +219,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
SNMP_MIB_ITEM("TCPRcvCollapsed", LINUX_MIB_TCPRCVCOLLAPSED),
+ SNMP_MIB_ITEM("TCPBacklogCoalesce", LINUX_MIB_TCPBACKLOGCOALESCE),
SNMP_MIB_ITEM("TCPDSACKOldSent", LINUX_MIB_TCPDSACKOLDSENT),
SNMP_MIB_ITEM("TCPDSACKOfoSent", LINUX_MIB_TCPDSACKOFOSENT),
SNMP_MIB_ITEM("TCPDSACKRecv", LINUX_MIB_TCPDSACKRECV),
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 32a691b7ce2c..92d249e053be 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -29,6 +29,7 @@
#include <net/protocol.h>
struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly;
+EXPORT_SYMBOL(inet_protos);
const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly;
EXPORT_SYMBOL(inet_offloads);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 169a652b3dd1..c55a5432cf37 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -131,8 +131,7 @@ struct sock *__raw_v4_lookup(struct net *net, struct sock *sk,
if (net_eq(sock_net(sk), net) && inet->inet_num == num &&
!(inet->inet_daddr && inet->inet_daddr != raddr) &&
!(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
- !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
- sk->sk_bound_dev_if != sdif))
+ raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
goto found; /* gotcha */
}
sk = NULL;
@@ -805,7 +804,7 @@ out:
return copied;
}
-static int raw_init(struct sock *sk)
+static int raw_sk_init(struct sock *sk)
{
struct raw_sock *rp = raw_sk(sk);
@@ -970,7 +969,7 @@ struct proto raw_prot = {
.connect = ip4_datagram_connect,
.disconnect = __udp_disconnect,
.ioctl = raw_ioctl,
- .init = raw_init,
+ .init = raw_sk_init,
.setsockopt = raw_setsockopt,
.getsockopt = raw_getsockopt,
.sendmsg = raw_sendmsg,
@@ -1134,3 +1133,27 @@ void __init raw_proc_exit(void)
unregister_pernet_subsys(&raw_net_ops);
}
#endif /* CONFIG_PROC_FS */
+
+static void raw_sysctl_init_net(struct net *net)
+{
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ net->ipv4.sysctl_raw_l3mdev_accept = 1;
+#endif
+}
+
+static int __net_init raw_sysctl_init(struct net *net)
+{
+ raw_sysctl_init_net(net);
+ return 0;
+}
+
+static struct pernet_operations __net_initdata raw_sysctl_ops = {
+ .init = raw_sysctl_init,
+};
+
+void __init raw_init(void)
+{
+ raw_sysctl_init_net(&init_net);
+ if (register_pernet_subsys(&raw_sysctl_ops))
+ panic("RAW: failed to init sysctl parameters.\n");
+}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c0a9d26c06ce..ce92f73cf104 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1677,7 +1677,7 @@ static void ip_handle_martian_source(struct net_device *dev,
print_hex_dump(KERN_WARNING, "ll header: ",
DUMP_PREFIX_OFFSET, 16, 1,
skb_mac_header(skb),
- dev->hard_header_len, true);
+ dev->hard_header_len, false);
}
}
#endif
@@ -2849,6 +2849,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = -rt->dst.error;
} else {
fl4.flowi4_iif = LOOPBACK_IFINDEX;
+ skb->dev = net->loopback_dev;
rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
err = 0;
if (IS_ERR(rt))
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 891ed2f91467..ba0fc4b18465 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -602,6 +602,17 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = ipv4_ping_group_range,
},
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ {
+ .procname = "raw_l3mdev_accept",
+ .data = &init_net.ipv4.sysctl_raw_l3mdev_accept,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif
{
.procname = "tcp_ecn",
.data = &init_net.ipv4.sysctl_tcp_ecn,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9e6bc4d6daa7..27e2f6837062 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1423,7 +1423,7 @@ do_error:
if (copied + copied_syn)
goto out;
out_err:
- sock_zerocopy_put_abort(uarg);
+ sock_zerocopy_put_abort(uarg, true);
err = sk_stream_error(sk, flags, err);
/* make sure we wake any epoll edge trigger waiter */
if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
@@ -2088,7 +2088,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
}
continue;
- found_ok_skb:
+found_ok_skb:
/* Ok so how much can we use? */
used = skb->len - offset;
if (len < used)
@@ -2147,7 +2147,7 @@ skip_copy:
sk_eat_skb(sk, skb);
continue;
- found_fin_ok:
+found_fin_ok:
/* Process the FIN. */
++*seq;
if (!(flags & MSG_PEEK))
@@ -2241,10 +2241,6 @@ void tcp_set_state(struct sock *sk, int state)
* socket sitting in hash tables.
*/
inet_sk_state_store(sk, state);
-
-#ifdef STATE_TRACE
- SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
-#endif
}
EXPORT_SYMBOL_GPL(tcp_set_state);
@@ -3246,6 +3242,7 @@ static size_t tcp_opt_stats_get_size(void)
nla_total_size_64bit(sizeof(u64)) + /* TCP_NLA_BYTES_RETRANS */
nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */
+ nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */
0;
}
@@ -3299,6 +3296,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
TCP_NLA_PAD);
nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
+ nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3);
return stats;
}
@@ -3658,8 +3656,11 @@ bool tcp_alloc_md5sig_pool(void)
if (unlikely(!tcp_md5sig_pool_populated)) {
mutex_lock(&tcp_md5sig_mutex);
- if (!tcp_md5sig_pool_populated)
+ if (!tcp_md5sig_pool_populated) {
__tcp_alloc_md5sig_pool();
+ if (tcp_md5sig_pool_populated)
+ static_key_slow_inc(&tcp_md5_needed);
+ }
mutex_unlock(&tcp_md5sig_mutex);
}
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 9277abdd822a..0f497fc49c3f 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -128,7 +128,12 @@ static const u32 bbr_probe_rtt_mode_ms = 200;
/* Skip TSO below the following bandwidth (bits/sec): */
static const int bbr_min_tso_rate = 1200000;
-/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. */
+/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck.
+ * In order to help drive the network toward lower queues and low latency while
+ * maintaining high utilization, the average pacing rate aims to be slightly
+ * lower than the estimated bandwidth. This is an important aspect of the
+ * design.
+ */
static const int bbr_pacing_margin_percent = 1;
/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
@@ -247,13 +252,7 @@ static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
}
-/* Pace using current bw estimate and a gain factor. In order to help drive the
- * network toward lower queues while maintaining high utilization and low
- * latency, the average pacing rate aims to be slightly (~1%) lower than the
- * estimated bandwidth. This is an important aspect of the design. In this
- * implementation this slightly lower pacing rate is achieved implicitly by not
- * including link-layer headers in the packet size used for the pacing rate.
- */
+/* Pace using current bw estimate and a gain factor. */
static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
{
struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 3b45fe530f91..1bb7321a256d 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -8,6 +8,7 @@
#include <linux/wait.h>
#include <net/inet_common.h>
+#include <net/tls.h>
static bool tcp_bpf_stream_read(const struct sock *sk)
{
@@ -198,7 +199,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
msg->sg.start = i;
msg->sg.size -= apply_bytes;
sk_psock_queue_msg(psock, tmp);
- sk->sk_data_ready(sk);
+ sk_psock_data_ready(sk, psock);
} else {
sk_msg_free(sk, tmp);
kfree(tmp);
@@ -218,6 +219,8 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
u32 off;
while (1) {
+ bool has_tx_ulp;
+
sge = sk_msg_elem(msg, msg->sg.start);
size = (apply && apply_bytes < sge->length) ?
apply_bytes : sge->length;
@@ -226,7 +229,15 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
tcp_rate_check_app_limited(sk);
retry:
- ret = do_tcp_sendpages(sk, page, off, size, flags);
+ has_tx_ulp = tls_sw_has_ctx_tx(sk);
+ if (has_tx_ulp) {
+ flags |= MSG_SENDPAGE_NOPOLICY;
+ ret = kernel_sendpage_locked(sk,
+ page, off, size, flags);
+ } else {
+ ret = do_tcp_sendpages(sk, page, off, size, flags);
+ }
+
if (ret <= 0)
return ret;
if (apply)
@@ -289,12 +300,23 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
{
bool cork = false, enospc = msg->sg.start == msg->sg.end;
struct sock *sk_redir;
- u32 tosend;
+ u32 tosend, delta = 0;
int ret;
more_data:
- if (psock->eval == __SK_NONE)
+ if (psock->eval == __SK_NONE) {
+ /* Track delta in msg size to add/subtract it on SK_DROP from
+ * returned to user copied size. This ensures user doesn't
+ * get a positive return code with msg_cut_data and SK_DROP
+ * verdict.
+ */
+ delta = msg->sg.size;
psock->eval = sk_psock_msg_verdict(sk, psock, msg);
+ if (msg->sg.size < delta)
+ delta -= msg->sg.size;
+ else
+ delta = 0;
+ }
if (msg->cork_bytes &&
msg->cork_bytes > msg->sg.size && !enospc) {
@@ -350,7 +372,7 @@ more_data:
default:
sk_msg_free_partial(sk, msg, tosend);
sk_msg_apply_bytes(psock, tosend);
- *copied -= tosend;
+ *copied -= (tosend + delta);
return -EACCES;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a9d9555a973f..76858b14ebe9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1865,16 +1865,20 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
/* Emulate SACKs for SACKless connection: account for a new dupack. */
-static void tcp_add_reno_sack(struct sock *sk)
+static void tcp_add_reno_sack(struct sock *sk, int num_dupack)
{
- struct tcp_sock *tp = tcp_sk(sk);
- u32 prior_sacked = tp->sacked_out;
+ if (num_dupack) {
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 prior_sacked = tp->sacked_out;
+ s32 delivered;
- tp->sacked_out++;
- tcp_check_reno_reordering(sk, 0);
- if (tp->sacked_out > prior_sacked)
- tp->delivered++; /* Some out-of-order packet is delivered */
- tcp_verify_left_out(tp);
+ tp->sacked_out += num_dupack;
+ tcp_check_reno_reordering(sk, 0);
+ delivered = tp->sacked_out - prior_sacked;
+ if (delivered > 0)
+ tp->delivered += delivered;
+ tcp_verify_left_out(tp);
+ }
}
/* Account for ACK, ACKing some data in Reno Recovery phase. */
@@ -2459,8 +2463,8 @@ void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag)
u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
tp->prior_cwnd - 1;
sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
- } else if ((flag & FLAG_RETRANS_DATA_ACKED) &&
- !(flag & FLAG_LOST_RETRANS)) {
+ } else if ((flag & (FLAG_RETRANS_DATA_ACKED | FLAG_LOST_RETRANS)) ==
+ FLAG_RETRANS_DATA_ACKED) {
sndcnt = min_t(int, delta,
max_t(int, tp->prr_delivered - tp->prr_out,
newly_acked_sacked) + 1);
@@ -2636,7 +2640,7 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
* recovered or spurious. Otherwise retransmits more on partial ACKs.
*/
-static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
+static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
int *rexmit)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2655,7 +2659,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
return;
if (after(tp->snd_nxt, tp->high_seq)) {
- if (flag & FLAG_DATA_SACKED || is_dupack)
+ if (flag & FLAG_DATA_SACKED || num_dupack)
tp->frto = 0; /* Step 3.a. loss was real */
} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
tp->high_seq = tp->snd_nxt;
@@ -2681,8 +2685,8 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
/* A Reno DUPACK means new data in F-RTO step 2.b above are
* delivered. Lower inflight to clock out (re)tranmissions.
*/
- if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
- tcp_add_reno_sack(sk);
+ if (after(tp->snd_nxt, tp->high_seq) && num_dupack)
+ tcp_add_reno_sack(sk, num_dupack);
else if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
}
@@ -2759,13 +2763,13 @@ static bool tcp_force_fast_retransmit(struct sock *sk)
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
- bool is_dupack, int *ack_flag, int *rexmit)
+ int num_dupack, int *ack_flag, int *rexmit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int fast_rexmit = 0, flag = *ack_flag;
- bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
- tcp_force_fast_retransmit(sk));
+ bool do_lost = num_dupack || ((flag & FLAG_DATA_SACKED) &&
+ tcp_force_fast_retransmit(sk));
if (!tp->packets_out && tp->sacked_out)
tp->sacked_out = 0;
@@ -2812,8 +2816,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
switch (icsk->icsk_ca_state) {
case TCP_CA_Recovery:
if (!(flag & FLAG_SND_UNA_ADVANCED)) {
- if (tcp_is_reno(tp) && is_dupack)
- tcp_add_reno_sack(sk);
+ if (tcp_is_reno(tp))
+ tcp_add_reno_sack(sk, num_dupack);
} else {
if (tcp_try_undo_partial(sk, prior_snd_una))
return;
@@ -2828,7 +2832,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
tcp_identify_packet_loss(sk, ack_flag);
break;
case TCP_CA_Loss:
- tcp_process_loss(sk, flag, is_dupack, rexmit);
+ tcp_process_loss(sk, flag, num_dupack, rexmit);
tcp_identify_packet_loss(sk, ack_flag);
if (!(icsk->icsk_ca_state == TCP_CA_Open ||
(*ack_flag & FLAG_LOST_RETRANS)))
@@ -2839,8 +2843,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED)
tcp_reset_reno_sack(tp);
- if (is_dupack)
- tcp_add_reno_sack(sk);
+ tcp_add_reno_sack(sk, num_dupack);
}
if (icsk->icsk_ca_state <= TCP_CA_Disorder)
@@ -3562,7 +3565,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
bool is_sack_reneg = tp->is_sack_reneg;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
- bool is_dupack = false;
+ int num_dupack = 0;
int prior_packets = tp->packets_out;
u32 delivered = tp->delivered;
u32 lost = tp->lost;
@@ -3614,7 +3617,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (flag & FLAG_UPDATE_TS_RECENT)
tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
- if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
+ if ((flag & (FLAG_SLOWPATH | FLAG_SND_UNA_ADVANCED)) ==
+ FLAG_SND_UNA_ADVANCED) {
/* Window is constant, pure forward advance.
* No more checks are required.
* Note, we use the fact that SND.UNA>=SND.WL2.
@@ -3672,8 +3676,13 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_set_xmit_timer(sk);
if (tcp_ack_is_dubious(sk, flag)) {
- is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
+ num_dupack = 1;
+ /* Consider if pure acks were aggregated in tcp_add_backlog() */
+ if (!(flag & FLAG_DATA))
+ num_dupack = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
+ }
+ tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
&rexmit);
}
@@ -3691,7 +3700,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
no_queue:
/* If data was DSACKed, see if we can undo a cwnd reduction. */
if (flag & FLAG_DSACKING_ACK) {
- tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
&rexmit);
tcp_newly_delivered(sk, delivered, flag);
}
@@ -3716,7 +3725,7 @@ old_ack:
if (TCP_SKB_CB(skb)->sacked) {
flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
&sack_state);
- tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag,
+ tcp_fastretrans_alert(sk, prior_snd_una, num_dupack, &flag,
&rexmit);
tcp_newly_delivered(sk, delivered, flag);
tcp_xmit_recovery(sk, rexmit);
@@ -4606,13 +4615,12 @@ end:
}
}
-static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int hdrlen,
- bool *fragstolen)
+static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
+ bool *fragstolen)
{
int eaten;
struct sk_buff *tail = skb_peek_tail(&sk->sk_receive_queue);
- __skb_pull(skb, hdrlen);
eaten = (tail &&
tcp_try_coalesce(sk, tail,
skb, fragstolen)) ? 1 : 0;
@@ -4663,7 +4671,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
- if (tcp_queue_rcv(sk, skb, 0, &fragstolen)) {
+ if (tcp_queue_rcv(sk, skb, &fragstolen)) {
WARN_ON_ONCE(fragstolen); /* should not happen */
__kfree_skb(skb);
}
@@ -4723,7 +4731,7 @@ queue_and_out:
goto drop;
}
- eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
+ eaten = tcp_queue_rcv(sk, skb, &fragstolen);
if (skb->len)
tcp_event_data_recv(sk, skb);
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -5599,8 +5607,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
- eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
- &fragstolen);
+ __skb_pull(skb, tcp_header_len);
+ eaten = tcp_queue_rcv(sk, skb, &fragstolen);
tcp_event_data_recv(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index de47038afdf0..efc6fef692ff 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -423,7 +423,7 @@ EXPORT_SYMBOL(tcp_req_err);
*
*/
-void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
+int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
{
const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
@@ -446,20 +446,21 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
inet_iif(icmp_skb), 0);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
- return;
+ return -ENOENT;
}
if (sk->sk_state == TCP_TIME_WAIT) {
inet_twsk_put(inet_twsk(sk));
- return;
+ return 0;
}
seq = ntohl(th->seq);
- if (sk->sk_state == TCP_NEW_SYN_RECV)
- return tcp_req_err(sk, seq,
- type == ICMP_PARAMETERPROB ||
- type == ICMP_TIME_EXCEEDED ||
- (type == ICMP_DEST_UNREACH &&
- (code == ICMP_NET_UNREACH ||
- code == ICMP_HOST_UNREACH)));
+ if (sk->sk_state == TCP_NEW_SYN_RECV) {
+ tcp_req_err(sk, seq, type == ICMP_PARAMETERPROB ||
+ type == ICMP_TIME_EXCEEDED ||
+ (type == ICMP_DEST_UNREACH &&
+ (code == ICMP_NET_UNREACH ||
+ code == ICMP_HOST_UNREACH)));
+ return 0;
+ }
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
@@ -541,7 +542,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
skb = tcp_rtx_queue_head(sk);
- BUG_ON(!skb);
tcp_mstamp_refresh(tp);
delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
@@ -613,6 +613,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
out:
bh_unlock_sock(sk);
sock_put(sk);
+ return 0;
}
void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
@@ -969,10 +970,13 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
* We need to maintain these in the sk structure.
*/
+struct static_key tcp_md5_needed __read_mostly;
+EXPORT_SYMBOL(tcp_md5_needed);
+
/* Find the Key structure for an address. */
-struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family)
+struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
+ const union tcp_md5_addr *addr,
+ int family)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
@@ -1010,7 +1014,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
}
return best_match;
}
-EXPORT_SYMBOL(tcp_md5_do_lookup);
+EXPORT_SYMBOL(__tcp_md5_do_lookup);
static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
const union tcp_md5_addr *addr,
@@ -1618,12 +1622,14 @@ int tcp_v4_early_demux(struct sk_buff *skb)
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
{
u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
-
- /* Only socket owner can try to collapse/prune rx queues
- * to reduce memory overhead, so add a little headroom here.
- * Few sockets backlog are possibly concurrently non empty.
- */
- limit += 64*1024;
+ struct skb_shared_info *shinfo;
+ const struct tcphdr *th;
+ struct tcphdr *thtail;
+ struct sk_buff *tail;
+ unsigned int hdrlen;
+ bool fragstolen;
+ u32 gso_segs;
+ int delta;
/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
* we can fix skb->truesize to its real value to avoid future drops.
@@ -1633,6 +1639,86 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
*/
skb_condense(skb);
+ skb_dst_drop(skb);
+
+ if (unlikely(tcp_checksum_complete(skb))) {
+ bh_unlock_sock(sk);
+ __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
+ __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
+ return true;
+ }
+
+ /* Attempt coalescing to last skb in backlog, even if we are
+ * above the limits.
+ * This is okay because skb capacity is limited to MAX_SKB_FRAGS.
+ */
+ th = (const struct tcphdr *)skb->data;
+ hdrlen = th->doff * 4;
+ shinfo = skb_shinfo(skb);
+
+ if (!shinfo->gso_size)
+ shinfo->gso_size = skb->len - hdrlen;
+
+ if (!shinfo->gso_segs)
+ shinfo->gso_segs = 1;
+
+ tail = sk->sk_backlog.tail;
+ if (!tail)
+ goto no_coalesce;
+ thtail = (struct tcphdr *)tail->data;
+
+ if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
+ TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
+ ((TCP_SKB_CB(tail)->tcp_flags |
+ TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_URG) ||
+ ((TCP_SKB_CB(tail)->tcp_flags ^
+ TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
+#ifdef CONFIG_TLS_DEVICE
+ tail->decrypted != skb->decrypted ||
+#endif
+ thtail->doff != th->doff ||
+ memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th)))
+ goto no_coalesce;
+
+ __skb_pull(skb, hdrlen);
+ if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
+ thtail->window = th->window;
+
+ TCP_SKB_CB(tail)->end_seq = TCP_SKB_CB(skb)->end_seq;
+
+ if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))
+ TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+
+ TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+
+ if (TCP_SKB_CB(skb)->has_rxtstamp) {
+ TCP_SKB_CB(tail)->has_rxtstamp = true;
+ tail->tstamp = skb->tstamp;
+ skb_hwtstamps(tail)->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
+ }
+
+ /* Not as strict as GRO. We only need to carry mss max value */
+ skb_shinfo(tail)->gso_size = max(shinfo->gso_size,
+ skb_shinfo(tail)->gso_size);
+
+ gso_segs = skb_shinfo(tail)->gso_segs + shinfo->gso_segs;
+ skb_shinfo(tail)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
+
+ sk->sk_backlog.len += delta;
+ __NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPBACKLOGCOALESCE);
+ kfree_skb_partial(skb, fragstolen);
+ return false;
+ }
+ __skb_push(skb, hdrlen);
+
+no_coalesce:
+ /* Only socket owner can try to collapse/prune rx queues
+ * to reduce memory overhead, so add a little headroom here.
+ * Few sockets backlog are possibly concurrently non empty.
+ */
+ limit += 64*1024;
+
if (unlikely(sk_add_backlog(sk, skb, limit))) {
bh_unlock_sock(sk);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
@@ -2573,8 +2659,8 @@ static int __net_init tcp_sk_init(struct net *net)
* which are too large can cause TCP streams to be bursty.
*/
net->ipv4.sysctl_tcp_tso_win_divisor = 3;
- /* Default TSQ limit of four TSO segments */
- net->ipv4.sysctl_tcp_limit_output_bytes = 262144;
+ /* Default TSQ limit of 16 TSO segments */
+ net->ipv4.sysctl_tcp_limit_output_bytes = 16 * 65536;
/* rfc5961 challenge ack rate limiting */
net->ipv4.sysctl_tcp_challenge_ack_limit = 1000;
net->ipv4.sysctl_tcp_min_tso_segs = 2;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 870b0a335061..0fbf7d4df9da 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -10,6 +10,7 @@
* TCPv4 GSO/GRO support
*/
+#include <linux/indirect_call_wrapper.h>
#include <linux/skbuff.h>
#include <net/tcp.h>
#include <net/protocol.h>
@@ -305,7 +306,8 @@ int tcp_gro_complete(struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_gro_complete);
-static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
@@ -318,7 +320,7 @@ static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *
return tcp_gro_receive(head, skb);
}
-static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
+INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
{
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d1676d8a6ed7..730bc44dbad9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -233,16 +233,14 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
if (init_rcv_wnd)
*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
- (*rcv_wscale) = 0;
+ *rcv_wscale = 0;
if (wscale_ok) {
/* Set window scaling on max possible window */
space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
- while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) {
- space >>= 1;
- (*rcv_wscale)++;
- }
+ *rcv_wscale = clamp_t(int, ilog2(space) - 15,
+ 0, TCP_MAX_WSCALE);
}
/* Set the clamp no higher than max representable value */
(*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
@@ -596,7 +594,8 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
*md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+ if (static_key_false(&tcp_md5_needed) &&
+ rcu_access_pointer(tp->md5sig_info)) {
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (*md5) {
opts->options |= OPTION_MD5;
@@ -732,7 +731,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
*md5 = NULL;
#ifdef CONFIG_TCP_MD5SIG
- if (unlikely(rcu_access_pointer(tp->md5sig_info))) {
+ if (static_key_false(&tcp_md5_needed) &&
+ rcu_access_pointer(tp->md5sig_info)) {
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (*md5) {
opts->options |= OPTION_MD5;
@@ -1909,18 +1909,22 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
u32 max_segs)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- u32 age, send_win, cong_win, limit, in_flight;
+ u32 send_win, cong_win, limit, in_flight;
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *head;
int win_divisor;
+ s64 delta;
if (icsk->icsk_ca_state >= TCP_CA_Recovery)
goto send_now;
/* Avoid bursty behavior by allowing defer
- * only if the last write was recent.
+ * only if the last write was recent (1 ms).
+ * Note that tp->tcp_wstamp_ns can be in the future if we have
+ * packets waiting in a qdisc or device for EDT delivery.
*/
- if ((s32)(tcp_jiffies32 - tp->lsndtime) > 0)
+ delta = tp->tcp_clock_cache - tp->tcp_wstamp_ns - NSEC_PER_MSEC;
+ if (delta > 0)
goto send_now;
in_flight = tcp_packets_in_flight(tp);
@@ -1967,9 +1971,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
head = tcp_rtx_queue_head(sk);
if (!head)
goto send_now;
- age = tcp_stamp_us_delta(tp->tcp_mstamp, tcp_skb_timestamp_us(head));
+ delta = tp->tcp_clock_cache - head->tstamp;
/* If next ACK is likely to come too late (half srtt), do not defer */
- if (age < (tp->srtt_us >> 4))
+ if ((s64)(delta - (u64)NSEC_PER_USEC * (tp->srtt_us >> 4)) < 0)
goto send_now;
/* Ok, it looks like it is advisable to defer.
@@ -1991,7 +1995,8 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
}
/* If this packet won't get more data, do not wait. */
- if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+ if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) ||
+ TCP_SKB_CB(skb)->eor)
goto send_now;
return true;
@@ -2228,8 +2233,9 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit = max_t(unsigned long,
2 * skb->truesize,
sk->sk_pacing_rate >> sk->sk_pacing_shift);
- limit = min_t(unsigned long, limit,
- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
+ if (sk->sk_pacing_status == SK_PACING_NONE)
+ limit = min_t(unsigned long, limit,
+ sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
limit <<= factor;
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index c0630013c1ae..33bf8e9c8663 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -149,34 +149,40 @@ drop:
}
#endif
-static void tunnel4_err(struct sk_buff *skb, u32 info)
+static int tunnel4_err(struct sk_buff *skb, u32 info)
{
struct xfrm_tunnel *handler;
for_each_tunnel_rcu(tunnel4_handlers, handler)
if (!handler->err_handler(skb, info))
- break;
+ return 0;
+
+ return -ENOENT;
}
#if IS_ENABLED(CONFIG_IPV6)
-static void tunnel64_err(struct sk_buff *skb, u32 info)
+static int tunnel64_err(struct sk_buff *skb, u32 info)
{
struct xfrm_tunnel *handler;
for_each_tunnel_rcu(tunnel64_handlers, handler)
if (!handler->err_handler(skb, info))
- break;
+ return 0;
+
+ return -ENOENT;
}
#endif
#if IS_ENABLED(CONFIG_MPLS)
-static void tunnelmpls4_err(struct sk_buff *skb, u32 info)
+static int tunnelmpls4_err(struct sk_buff *skb, u32 info)
{
struct xfrm_tunnel *handler;
for_each_tunnel_rcu(tunnelmpls4_handlers, handler)
if (!handler->err_handler(skb, info))
- break;
+ return 0;
+
+ return -ENOENT;
}
#endif
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1976fddb9e00..3fb0ed5e4789 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -105,6 +105,7 @@
#include <net/net_namespace.h>
#include <net/icmp.h>
#include <net/inet_hashtables.h>
+#include <net/ip_tunnels.h>
#include <net/route.h>
#include <net/checksum.h>
#include <net/xfrm.h>
@@ -115,6 +116,7 @@
#include "udp_impl.h"
#include <net/sock_reuseport.h>
#include <net/addrconf.h>
+#include <net/udp_tunnel.h>
struct udp_table udp_table __read_mostly;
EXPORT_SYMBOL(udp_table);
@@ -371,21 +373,19 @@ static int compute_score(struct sock *sk, struct net *net,
{
int score;
struct inet_sock *inet;
+ bool dev_match;
if (!net_eq(sock_net(sk), net) ||
udp_sk(sk)->udp_port_hash != hnum ||
ipv6_only_sock(sk))
return -1;
- score = (sk->sk_family == PF_INET) ? 2 : 1;
- inet = inet_sk(sk);
+ if (sk->sk_rcv_saddr != daddr)
+ return -1;
- if (inet->inet_rcv_saddr) {
- if (inet->inet_rcv_saddr != daddr)
- return -1;
- score += 4;
- }
+ score = (sk->sk_family == PF_INET) ? 2 : 1;
+ inet = inet_sk(sk);
if (inet->inet_daddr) {
if (inet->inet_daddr != saddr)
return -1;
@@ -398,15 +398,11 @@ static int compute_score(struct sock *sk, struct net *net,
score += 4;
}
- if (sk->sk_bound_dev_if || exact_dif) {
- bool dev_match = (sk->sk_bound_dev_if == dif ||
- sk->sk_bound_dev_if == sdif);
-
- if (!dev_match)
- return -1;
- if (sk->sk_bound_dev_if)
- score += 4;
- }
+ dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
+ dif, sdif);
+ if (!dev_match)
+ return -1;
+ score += 4;
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
@@ -465,65 +461,30 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport, int dif,
int sdif, struct udp_table *udptable, struct sk_buff *skb)
{
- struct sock *sk, *result;
+ struct sock *result;
unsigned short hnum = ntohs(dport);
- unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
- struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
+ unsigned int hash2, slot2;
+ struct udp_hslot *hslot2;
bool exact_dif = udp_lib_exact_dif_match(net, skb);
- int score, badness;
- u32 hash = 0;
- if (hslot->count > 10) {
- hash2 = ipv4_portaddr_hash(net, daddr, hnum);
+ hash2 = ipv4_portaddr_hash(net, daddr, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+
+ result = udp4_lib_lookup2(net, saddr, sport,
+ daddr, hnum, dif, sdif,
+ exact_dif, hslot2, skb);
+ if (!result) {
+ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
- if (hslot->count < hslot2->count)
- goto begin;
result = udp4_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, sdif,
+ htonl(INADDR_ANY), hnum, dif, sdif,
exact_dif, hslot2, skb);
- if (!result) {
- unsigned int old_slot2 = slot2;
- hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
- slot2 = hash2 & udptable->mask;
- /* avoid searching the same slot again. */
- if (unlikely(slot2 == old_slot2))
- return result;
-
- hslot2 = &udptable->hash2[slot2];
- if (hslot->count < hslot2->count)
- goto begin;
-
- result = udp4_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, sdif,
- exact_dif, hslot2, skb);
- }
- if (unlikely(IS_ERR(result)))
- return NULL;
- return result;
- }
-begin:
- result = NULL;
- badness = 0;
- sk_for_each_rcu(sk, &hslot->head) {
- score = compute_score(sk, net, saddr, sport,
- daddr, hnum, dif, sdif, exact_dif);
- if (score > badness) {
- if (sk->sk_reuseport) {
- hash = udp_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (unlikely(IS_ERR(result)))
- return NULL;
- if (result)
- return result;
- }
- result = sk;
- badness = score;
- }
}
+ if (unlikely(IS_ERR(result)))
+ return NULL;
return result;
}
EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
@@ -585,6 +546,89 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
return true;
}
+DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+void udp_encap_enable(void)
+{
+ static_branch_inc(&udp_encap_needed_key);
+}
+EXPORT_SYMBOL(udp_encap_enable);
+
+/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
+ * through error handlers in encapsulations looking for a match.
+ */
+static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info)
+{
+ int i;
+
+ for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
+ int (*handler)(struct sk_buff *skb, u32 info);
+
+ if (!iptun_encaps[i])
+ continue;
+ handler = rcu_dereference(iptun_encaps[i]->err_handler);
+ if (handler && !handler(skb, info))
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+/* Try to match ICMP errors to UDP tunnels by looking up a socket without
+ * reversing source and destination port: this will match tunnels that force the
+ * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
+ * lwtunnels might actually break this assumption by being configured with
+ * different destination ports on endpoints, in this case we won't be able to
+ * trace ICMP messages back to them.
+ *
+ * If this doesn't match any socket, probe tunnels with arbitrary destination
+ * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
+ * we've sent packets to won't necessarily match the local destination port.
+ *
+ * Then ask the tunnel implementation to match the error against a valid
+ * association.
+ *
+ * Return an error if we can't find a match, the socket if we need further
+ * processing, zero otherwise.
+ */
+static struct sock *__udp4_lib_err_encap(struct net *net,
+ const struct iphdr *iph,
+ struct udphdr *uh,
+ struct udp_table *udptable,
+ struct sk_buff *skb, u32 info)
+{
+ int network_offset, transport_offset;
+ struct sock *sk;
+
+ network_offset = skb_network_offset(skb);
+ transport_offset = skb_transport_offset(skb);
+
+ /* Network header needs to point to the outer IPv4 header inside ICMP */
+ skb_reset_network_header(skb);
+
+ /* Transport header needs to point to the UDP header */
+ skb_set_transport_header(skb, iph->ihl << 2);
+
+ sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
+ iph->saddr, uh->dest, skb->dev->ifindex, 0,
+ udptable, NULL);
+ if (sk) {
+ int (*lookup)(struct sock *sk, struct sk_buff *skb);
+ struct udp_sock *up = udp_sk(sk);
+
+ lookup = READ_ONCE(up->encap_err_lookup);
+ if (!lookup || lookup(sk, skb))
+ sk = NULL;
+ }
+
+ if (!sk)
+ sk = ERR_PTR(__udp4_lib_err_encap_no_sk(skb, info));
+
+ skb_set_transport_header(skb, transport_offset);
+ skb_set_network_header(skb, network_offset);
+
+ return sk;
+}
+
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
@@ -596,13 +640,14 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
* to find the appropriate port.
*/
-void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
+int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
{
struct inet_sock *inet;
const struct iphdr *iph = (const struct iphdr *)skb->data;
struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
+ bool tunnel = false;
struct sock *sk;
int harderr;
int err;
@@ -612,8 +657,21 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
iph->saddr, uh->source, skb->dev->ifindex,
inet_sdif(skb), udptable, NULL);
if (!sk) {
- __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
- return; /* No socket for error */
+ /* No socket for error: try tunnels before discarding */
+ sk = ERR_PTR(-ENOENT);
+ if (static_branch_unlikely(&udp_encap_needed_key)) {
+ sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb,
+ info);
+ if (!sk)
+ return 0;
+ }
+
+ if (IS_ERR(sk)) {
+ __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
+ return PTR_ERR(sk);
+ }
+
+ tunnel = true;
}
err = 0;
@@ -656,6 +714,10 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
* RFC1122: OK. Passes ICMP errors back to application, as per
* 4.1.3.3.
*/
+ if (tunnel) {
+ /* ...not for tunnels though: we don't have a sending socket */
+ goto out;
+ }
if (!inet->recverr) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
@@ -665,12 +727,12 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
sk->sk_err = err;
sk->sk_error_report(sk);
out:
- return;
+ return 0;
}
-void udp_err(struct sk_buff *skb, u32 info)
+int udp_err(struct sk_buff *skb, u32 info)
{
- __udp4_lib_err(skb, info, &udp_table);
+ return __udp4_lib_err(skb, info, &udp_table);
}
/*
@@ -1713,6 +1775,10 @@ try_again:
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
*addr_len = sizeof(*sin);
}
+
+ if (udp_sk(sk)->gro_enabled)
+ udp_cmsg_recv(msg, sk, skb);
+
if (inet->cmsg_flags)
ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
@@ -1889,13 +1955,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return 0;
}
-DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
-void udp_encap_enable(void)
-{
- static_branch_enable(&udp_encap_needed_key);
-}
-EXPORT_SYMBOL(udp_encap_enable);
-
/* returns:
* -1: error
* 0: success
@@ -1904,7 +1963,7 @@ EXPORT_SYMBOL(udp_encap_enable);
* Note that in the success and error cases, the skb is assumed to
* have either been requeued or freed.
*/
-static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
int is_udplite = IS_UDPLITE(sk);
@@ -2007,6 +2066,27 @@ drop:
return -1;
}
+static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ struct sk_buff *next, *segs;
+ int ret;
+
+ if (likely(!udp_unexpected_gso(sk, skb)))
+ return udp_queue_rcv_one_skb(sk, skb);
+
+ BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_SGO_CB_OFFSET);
+ __skb_push(skb, -skb_mac_offset(skb));
+ segs = udp_rcv_segment(sk, skb, true);
+ for (skb = segs; skb; skb = next) {
+ next = skb->next;
+ __skb_pull(skb, skb_transport_offset(skb));
+ ret = udp_queue_rcv_one_skb(sk, skb);
+ if (ret > 0)
+ ip_protocol_deliver_rcu(dev_net(skb->dev), skb, -ret);
+ }
+ return 0;
+}
+
/* For TCP sockets, sk_rx_dst is protected by socket lock
* For UDP, we use xchg() to guard against concurrent changes.
*/
@@ -2398,11 +2478,15 @@ void udp_destroy_sock(struct sock *sk)
bool slow = lock_sock_fast(sk);
udp_flush_pending_frames(sk);
unlock_sock_fast(sk, slow);
- if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
- void (*encap_destroy)(struct sock *sk);
- encap_destroy = READ_ONCE(up->encap_destroy);
- if (encap_destroy)
- encap_destroy(sk);
+ if (static_branch_unlikely(&udp_encap_needed_key)) {
+ if (up->encap_type) {
+ void (*encap_destroy)(struct sock *sk);
+ encap_destroy = READ_ONCE(up->encap_destroy);
+ if (encap_destroy)
+ encap_destroy(sk);
+ }
+ if (up->encap_enabled)
+ static_branch_dec(&udp_encap_needed_key);
}
}
@@ -2447,7 +2531,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
/* FALLTHROUGH */
case UDP_ENCAP_L2TPINUDP:
up->encap_type = val;
- udp_encap_enable();
+ lock_sock(sk);
+ udp_tunnel_encap_enable(sk->sk_socket);
+ release_sock(sk);
break;
default:
err = -ENOPROTOOPT;
@@ -2469,6 +2555,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
up->gso_size = val;
break;
+ case UDP_GRO:
+ lock_sock(sk);
+ if (valbool)
+ udp_tunnel_encap_enable(sk->sk_socket);
+ up->gro_enabled = valbool;
+ release_sock(sk);
+ break;
+
/*
* UDP-Lite's partial checksum coverage (RFC 3828).
*/
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index e7d18b140287..322672655419 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -7,7 +7,7 @@
#include <net/inet_common.h>
int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int);
-void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
+int __udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
int udp_v4_get_port(struct sock *sk, unsigned short snum);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 802f2bc00d69..64f9715173ac 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -13,6 +13,7 @@
#include <linux/skbuff.h>
#include <net/udp.h>
#include <net/protocol.h>
+#include <net/inet_common.h>
static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
@@ -343,6 +344,56 @@ out:
return segs;
}
+#define UDP_GRO_CNT_MAX 64
+static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
+ struct sk_buff *skb)
+{
+ struct udphdr *uh = udp_hdr(skb);
+ struct sk_buff *pp = NULL;
+ struct udphdr *uh2;
+ struct sk_buff *p;
+
+ /* requires non zero csum, for symmetry with GSO */
+ if (!uh->check) {
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
+
+ /* pull encapsulating udp header */
+ skb_gro_pull(skb, sizeof(struct udphdr));
+ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+
+ list_for_each_entry(p, head, list) {
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ uh2 = udp_hdr(p);
+
+ /* Match ports only, as csum is always non zero */
+ if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
+ NAPI_GRO_CB(p)->same_flow = 0;
+ continue;
+ }
+
+ /* Terminate the flow on len mismatch or if it grow "too much".
+ * Under small packet flood GRO count could elsewhere grow a lot
+ * leading to execessive truesize values
+ */
+ if (!skb_gro_receive(p, skb) &&
+ NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX)
+ pp = p;
+ else if (uh->len != uh2->len)
+ pp = p;
+
+ return pp;
+ }
+
+ /* mismatch, but we never need to flush */
+ return NULL;
+}
+
+INDIRECT_CALLABLE_DECLARE(struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
+ __be16 sport, __be16 dport));
struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
struct udphdr *uh, udp_lookup_t lookup)
{
@@ -353,23 +404,28 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
int flush = 1;
struct sock *sk;
+ rcu_read_lock();
+ sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
+ udp4_lib_lookup_skb, skb, uh->source, uh->dest);
+ if (!sk)
+ goto out_unlock;
+
+ if (udp_sk(sk)->gro_enabled) {
+ pp = call_gro_receive(udp_gro_receive_segment, head, skb);
+ rcu_read_unlock();
+ return pp;
+ }
+
if (NAPI_GRO_CB(skb)->encap_mark ||
(skb->ip_summed != CHECKSUM_PARTIAL &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
- !NAPI_GRO_CB(skb)->csum_valid))
- goto out;
+ !NAPI_GRO_CB(skb)->csum_valid) ||
+ !udp_sk(sk)->gro_receive)
+ goto out_unlock;
/* mark that this skb passed once through the tunnel gro layer */
NAPI_GRO_CB(skb)->encap_mark = 1;
- rcu_read_lock();
- sk = (*lookup)(skb, uh->source, uh->dest);
-
- if (sk && udp_sk(sk)->gro_receive)
- goto unflush;
- goto out_unlock;
-
-unflush:
flush = 0;
list_for_each_entry(p, head, list) {
@@ -394,14 +450,13 @@ unflush:
out_unlock:
rcu_read_unlock();
-out:
skb_gro_flush_final(skb, pp, flush);
return pp;
}
EXPORT_SYMBOL(udp_gro_receive);
-static struct sk_buff *udp4_gro_receive(struct list_head *head,
- struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
@@ -427,6 +482,19 @@ flush:
return NULL;
}
+static int udp_gro_complete_segment(struct sk_buff *skb)
+{
+ struct udphdr *uh = udp_hdr(skb);
+
+ skb->csum_start = (unsigned char *)uh - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
+ skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4;
+ return 0;
+}
+
int udp_gro_complete(struct sk_buff *skb, int nhoff,
udp_lookup_t lookup)
{
@@ -437,16 +505,22 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
uh->len = newlen;
- /* Set encapsulation before calling into inner gro_complete() functions
- * to make them set up the inner offsets.
- */
- skb->encapsulation = 1;
-
rcu_read_lock();
- sk = (*lookup)(skb, uh->source, uh->dest);
- if (sk && udp_sk(sk)->gro_complete)
+ sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb,
+ udp4_lib_lookup_skb, skb, uh->source, uh->dest);
+ if (sk && udp_sk(sk)->gro_enabled) {
+ err = udp_gro_complete_segment(skb);
+ } else if (sk && udp_sk(sk)->gro_complete) {
+ skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
+ : SKB_GSO_UDP_TUNNEL;
+
+ /* Set encapsulation before calling into inner gro_complete()
+ * functions to make them set up the inner offsets.
+ */
+ skb->encapsulation = 1;
err = udp_sk(sk)->gro_complete(sk, skb,
nhoff + sizeof(struct udphdr));
+ }
rcu_read_unlock();
if (skb->remcsum_offload)
@@ -456,18 +530,14 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
}
EXPORT_SYMBOL(udp_gro_complete);
-static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
+INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct iphdr *iph = ip_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
- if (uh->check) {
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ if (uh->check)
uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
iph->daddr, 0);
- } else {
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
- }
return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
}
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 6539ff15e9a3..be8b5b2157d8 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -20,6 +20,23 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
if (err < 0)
goto error;
+ if (cfg->bind_ifindex) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(net, cfg->bind_ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto error;
+ }
+
+ err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE,
+ dev->name, strlen(dev->name) + 1);
+ dev_put(dev);
+
+ if (err < 0)
+ goto error;
+ }
+
udp_addr.sin_family = AF_INET;
udp_addr.sin_addr = cfg->local_ip;
udp_addr.sin_port = cfg->local_udp_port;
@@ -68,6 +85,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
udp_sk(sk)->encap_type = cfg->encap_type;
udp_sk(sk)->encap_rcv = cfg->encap_rcv;
+ udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup;
udp_sk(sk)->encap_destroy = cfg->encap_destroy;
udp_sk(sk)->gro_receive = cfg->gro_receive;
udp_sk(sk)->gro_complete = cfg->gro_complete;
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 8545457752fb..39c7f17d916f 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -25,9 +25,9 @@ static int udplite_rcv(struct sk_buff *skb)
return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
}
-static void udplite_err(struct sk_buff *skb, u32 info)
+static int udplite_err(struct sk_buff *skb, u32 info)
{
- __udp4_lib_err(skb, info, &udplite_table);
+ return __udp4_lib_err(skb, info, &udplite_table);
}
static const struct net_protocol udplite_protocol = {
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index 8dd0e6ab8606..35c54865dc42 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -106,13 +106,15 @@ static int xfrm4_esp_rcv(struct sk_buff *skb)
return 0;
}
-static void xfrm4_esp_err(struct sk_buff *skb, u32 info)
+static int xfrm4_esp_err(struct sk_buff *skb, u32 info)
{
struct xfrm4_protocol *handler;
for_each_protocol_rcu(esp4_handlers, handler)
if (!handler->err_handler(skb, info))
- break;
+ return 0;
+
+ return -ENOENT;
}
static int xfrm4_ah_rcv(struct sk_buff *skb)
@@ -132,13 +134,15 @@ static int xfrm4_ah_rcv(struct sk_buff *skb)
return 0;
}
-static void xfrm4_ah_err(struct sk_buff *skb, u32 info)
+static int xfrm4_ah_err(struct sk_buff *skb, u32 info)
{
struct xfrm4_protocol *handler;
for_each_protocol_rcu(ah4_handlers, handler)
if (!handler->err_handler(skb, info))
- break;
+ return 0;
+
+ return -ENOENT;
}
static int xfrm4_ipcomp_rcv(struct sk_buff *skb)
@@ -158,13 +162,15 @@ static int xfrm4_ipcomp_rcv(struct sk_buff *skb)
return 0;
}
-static void xfrm4_ipcomp_err(struct sk_buff *skb, u32 info)
+static int xfrm4_ipcomp_err(struct sk_buff *skb, u32 info)
{
struct xfrm4_protocol *handler;
for_each_protocol_rcu(ipcomp4_handlers, handler)
if (!handler->err_handler(skb, info))
- break;
+ return 0;
+
+ return -ENOENT;
}
static const struct net_protocol esp4_protocol = {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 045597b9a7c0..521e471f1cf9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2820,7 +2820,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg)
dev = __dev_get_by_name(net, p.name);
if (!dev)
goto err_exit;
- err = dev_open(dev);
+ err = dev_open(dev, NULL);
}
}
#endif
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 94999058e110..cca3b3603c42 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -433,7 +433,6 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
const struct in6_addr *addr)
{
- unsigned int hash = inet6_acaddr_hash(net, addr);
struct net_device *nh_dev;
struct ifacaddr6 *aca;
bool found = false;
@@ -441,7 +440,9 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
rcu_read_lock();
if (dev)
found = ipv6_chk_acast_dev(dev, addr);
- else
+ else {
+ unsigned int hash = inet6_acaddr_hash(net, addr);
+
hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
aca_addr_lst) {
nh_dev = fib6_info_nh_dev(aca->aca_rt);
@@ -452,6 +453,7 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
break;
}
}
+ }
rcu_read_unlock();
return found;
}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 1ede7a16a0be..bde08aa549f3 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -772,6 +772,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
case IPV6_2292PKTINFO:
{
struct net_device *dev = NULL;
+ int src_idx;
if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
err = -EINVAL;
@@ -779,12 +780,15 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
}
src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+ src_idx = src_info->ipi6_ifindex;
- if (src_info->ipi6_ifindex) {
+ if (src_idx) {
if (fl6->flowi6_oif &&
- src_info->ipi6_ifindex != fl6->flowi6_oif)
+ src_idx != fl6->flowi6_oif &&
+ (sk->sk_bound_dev_if != fl6->flowi6_oif ||
+ !sk_dev_equal_l3scope(sk, src_idx)))
return -EINVAL;
- fl6->flowi6_oif = src_info->ipi6_ifindex;
+ fl6->flowi6_oif = src_idx;
}
addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 63b2b66f9dfa..5afe9f83374d 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -145,10 +145,13 @@ static void esp_output_done(struct crypto_async_request *base, int err)
void *tmp;
struct xfrm_state *x;
- if (xo && (xo->flags & XFRM_DEV_RESUME))
- x = skb->sp->xvec[skb->sp->len - 1];
- else
+ if (xo && (xo->flags & XFRM_DEV_RESUME)) {
+ struct sec_path *sp = skb_sec_path(skb);
+
+ x = sp->xvec[sp->len - 1];
+ } else {
x = skb_dst(skb)->xfrm;
+ }
tmp = ESP_SKB_CB(skb)->tmp;
esp_ssg_unref(x, tmp);
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 6177e2171171..d46b4eb645c2 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -68,11 +68,12 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
xo = xfrm_offload(skb);
if (!xo || !(xo->flags & CRYPTO_DONE)) {
- err = secpath_set(skb);
- if (err)
+ struct sec_path *sp = secpath_set(skb);
+
+ if (!sp)
goto out;
- if (skb->sp->len == XFRM_MAX_DEPTH)
+ if (sp->len == XFRM_MAX_DEPTH)
goto out;
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
@@ -81,8 +82,8 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
if (!x)
goto out;
- skb->sp->xvec[skb->sp->len++] = x;
- skb->sp->olen++;
+ sp->xvec[sp->len++] = x;
+ sp->olen++;
xo = xfrm_offload(skb);
if (!xo) {
@@ -141,6 +142,7 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
struct crypto_aead *aead;
netdev_features_t esp_features = features;
struct xfrm_offload *xo = xfrm_offload(skb);
+ struct sec_path *sp;
if (!xo)
return ERR_PTR(-EINVAL);
@@ -148,7 +150,8 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
return ERR_PTR(-EINVAL);
- x = skb->sp->xvec[skb->sp->len - 1];
+ sp = skb_sec_path(skb);
+ x = sp->xvec[sp->len - 1];
aead = x->data;
esph = ip_esp_hdr(skb);
diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c
index 6de3c04b0f30..bd675c61deb1 100644
--- a/net/ipv6/fou6.c
+++ b/net/ipv6/fou6.c
@@ -4,6 +4,7 @@
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/udp.h>
+#include <linux/icmpv6.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <net/fou.h>
@@ -69,14 +70,87 @@ static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
return 0;
}
+static int gue6_err_proto_handler(int proto, struct sk_buff *skb,
+ struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, u32 info)
+{
+ const struct inet6_protocol *ipprot;
+
+ ipprot = rcu_dereference(inet6_protos[proto]);
+ if (ipprot && ipprot->err_handler) {
+ if (!ipprot->err_handler(skb, opt, type, code, offset, info))
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info)
+{
+ int transport_offset = skb_transport_offset(skb);
+ struct guehdr *guehdr;
+ size_t optlen;
+ int ret;
+
+ if (skb->len < sizeof(struct udphdr) + sizeof(struct guehdr))
+ return -EINVAL;
+
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+
+ switch (guehdr->version) {
+ case 0: /* Full GUE header present */
+ break;
+ case 1: {
+ /* Direct encasulation of IPv4 or IPv6 */
+ skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr));
+
+ switch (((struct iphdr *)guehdr)->version) {
+ case 4:
+ ret = gue6_err_proto_handler(IPPROTO_IPIP, skb, opt,
+ type, code, offset, info);
+ goto out;
+ case 6:
+ ret = gue6_err_proto_handler(IPPROTO_IPV6, skb, opt,
+ type, code, offset, info);
+ goto out;
+ default:
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+ }
+ default: /* Undefined version */
+ return -EOPNOTSUPP;
+ }
+
+ if (guehdr->control)
+ return -ENOENT;
+
+ optlen = guehdr->hlen << 2;
+
+ if (validate_gue_flags(guehdr, optlen))
+ return -EINVAL;
+
+ skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr));
+ ret = gue6_err_proto_handler(guehdr->proto_ctype, skb,
+ opt, type, code, offset, info);
+
+out:
+ skb_set_transport_header(skb, transport_offset);
+ return ret;
+}
+
+
static const struct ip6_tnl_encap_ops fou_ip6tun_ops = {
.encap_hlen = fou_encap_hlen,
.build_header = fou6_build_header,
+ .err_handler = gue6_err,
};
static const struct ip6_tnl_encap_ops gue_ip6tun_ops = {
.encap_hlen = gue_encap_hlen,
.build_header = gue6_build_header,
+ .err_handler = gue6_err,
};
static int ip6_tnl_encap_add_fou_ops(void)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index c9c53ade55c3..5d7aa2c2770c 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -84,7 +84,7 @@ static inline struct sock *icmpv6_sk(struct net *net)
return net->ipv6.icmp_sk[smp_processor_id()];
}
-static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
/* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
@@ -100,6 +100,8 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!(type & ICMPV6_INFOMSG_MASK))
if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
ping_err(skb, offset, ntohl(info));
+
+ return 0;
}
static int icmpv6_rcv(struct sk_buff *skb);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 3d7c7460a0c5..f3515ebe9b3a 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -102,22 +102,13 @@ static inline int compute_score(struct sock *sk, struct net *net,
if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&
sk->sk_family == PF_INET6) {
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ return -1;
+
+ if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
+ return -1;
score = 1;
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
- return -1;
- score++;
- }
- if (sk->sk_bound_dev_if || exact_dif) {
- bool dev_match = (sk->sk_bound_dev_if == dif ||
- sk->sk_bound_dev_if == sdif);
-
- if (!dev_match)
- return -1;
- if (sk->sk_bound_dev_if)
- score++;
- }
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
}
@@ -166,26 +157,12 @@ struct sock *inet6_lookup_listener(struct net *net,
const __be16 sport, const struct in6_addr *daddr,
const unsigned short hnum, const int dif, const int sdif)
{
- unsigned int hash = inet_lhashfn(net, hnum);
- struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
- bool exact_dif = inet6_exact_dif_match(net, skb);
struct inet_listen_hashbucket *ilb2;
- struct sock *sk, *result = NULL;
- int score, hiscore = 0;
+ struct sock *result = NULL;
unsigned int hash2;
- u32 phash = 0;
-
- if (ilb->count <= 10 || !hashinfo->lhash2)
- goto port_lookup;
-
- /* Too many sk in the ilb bucket (which is hashed by port alone).
- * Try lhash2 (which is hashed by port and addr) instead.
- */
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
- if (ilb2->count > ilb->count)
- goto port_lookup;
result = inet6_lhash2_lookup(net, ilb2, skb, doff,
saddr, sport, daddr, hnum,
@@ -194,33 +171,12 @@ struct sock *inet6_lookup_listener(struct net *net,
goto done;
/* Lookup lhash2 with in6addr_any */
-
hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
ilb2 = inet_lhash2_bucket(hashinfo, hash2);
- if (ilb2->count > ilb->count)
- goto port_lookup;
result = inet6_lhash2_lookup(net, ilb2, skb, doff,
- saddr, sport, daddr, hnum,
+ saddr, sport, &in6addr_any, hnum,
dif, sdif);
- goto done;
-
-port_lookup:
- sk_for_each(sk, &ilb->head) {
- score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
- if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet6_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- goto done;
- }
- result = sk;
- hiscore = score;
- }
- }
done:
if (unlikely(IS_ERR(result)))
return NULL;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 515adbdba1d2..229e55c99021 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -423,7 +423,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
}
-static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct net *net = dev_net(skb->dev);
@@ -433,13 +433,13 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IPV6),
offset) < 0)
- return;
+ return -EINVAL;
ipv6h = (const struct ipv6hdr *)skb->data;
t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
tpi.key, tpi.proto);
if (!t)
- return;
+ return -ENOENT;
switch (type) {
struct ipv6_tlv_tnl_enc_lim *tel;
@@ -449,14 +449,14 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
t->parms.name);
if (code != ICMPV6_PORT_UNREACH)
break;
- return;
+ return 0;
case ICMPV6_TIME_EXCEED:
if (code == ICMPV6_EXC_HOPLIMIT) {
net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
t->parms.name);
break;
}
- return;
+ return 0;
case ICMPV6_PARAMPROB:
teli = 0;
if (code == ICMPV6_HDR_FIELD)
@@ -472,14 +472,14 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
t->parms.name);
}
- return;
+ return 0;
case ICMPV6_PKT_TOOBIG:
ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
- return;
+ return 0;
case NDISC_REDIRECT:
ip6_redirect(skb, net, skb->dev->ifindex, 0,
sock_net_uid(net, NULL));
- return;
+ return 0;
}
if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
@@ -487,6 +487,8 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
else
t->err_count = 1;
t->err_time = jiffies;
+
+ return 0;
}
static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
@@ -1883,12 +1885,6 @@ static void ip6gre_tap_setup(struct net_device *dev)
netif_keep_dst(dev);
}
-bool is_ip6gretap_dev(const struct net_device *dev)
-{
- return dev->netdev_ops == &ip6gre_tap_netdev_ops;
-}
-EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
-
static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
struct ip_tunnel_encap *ipencap)
{
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index c1d85830c906..c7ed2b6d5a1d 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -319,28 +319,26 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
/*
* Deliver the packet to the host
*/
-
-
-static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr,
+ bool have_final)
{
const struct inet6_protocol *ipprot;
struct inet6_dev *idev;
unsigned int nhoff;
- int nexthdr;
bool raw;
- bool have_final = false;
/*
* Parse extension headers
*/
- rcu_read_lock();
resubmit:
idev = ip6_dst_idev(skb_dst(skb));
- if (!pskb_pull(skb, skb_transport_offset(skb)))
- goto discard;
nhoff = IP6CB(skb)->nhoff;
- nexthdr = skb_network_header(skb)[nhoff];
+ if (!have_final) {
+ if (!pskb_pull(skb, skb_transport_offset(skb)))
+ goto discard;
+ nexthdr = skb_network_header(skb)[nhoff];
+ }
resubmit_final:
raw = raw6_local_deliver(skb, nexthdr);
@@ -359,6 +357,8 @@ resubmit_final:
}
} else if (ipprot->flags & INET6_PROTO_FINAL) {
const struct ipv6hdr *hdr;
+ int sdif = inet6_sdif(skb);
+ struct net_device *dev;
/* Only do this once for first final protocol */
have_final = true;
@@ -371,9 +371,19 @@ resubmit_final:
skb_postpull_rcsum(skb, skb_network_header(skb),
skb_network_header_len(skb));
hdr = ipv6_hdr(skb);
+
+ /* skb->dev passed may be master dev for vrfs. */
+ if (sdif) {
+ dev = dev_get_by_index_rcu(net, sdif);
+ if (!dev)
+ goto discard;
+ } else {
+ dev = skb->dev;
+ }
+
if (ipv6_addr_is_multicast(&hdr->daddr) &&
- !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
- &hdr->saddr) &&
+ !ipv6_chk_mcast_addr(dev, &hdr->daddr,
+ &hdr->saddr) &&
!ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb)))
goto discard;
}
@@ -411,13 +421,19 @@ resubmit_final:
consume_skb(skb);
}
}
- rcu_read_unlock();
- return 0;
+ return;
discard:
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
- rcu_read_unlock();
kfree_skb(skb);
+}
+
+static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ rcu_read_lock();
+ ip6_protocol_deliver_rcu(net, skb, 0, false);
+ rcu_read_unlock();
+
return 0;
}
@@ -432,15 +448,32 @@ EXPORT_SYMBOL_GPL(ip6_input);
int ip6_mc_input(struct sk_buff *skb)
{
+ int sdif = inet6_sdif(skb);
const struct ipv6hdr *hdr;
+ struct net_device *dev;
bool deliver;
__IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev),
__in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST,
skb->len);
+ /* skb->dev passed may be master dev for vrfs. */
+ if (sdif) {
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), sdif);
+ if (!dev) {
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return -ENODEV;
+ }
+ } else {
+ dev = skb->dev;
+ }
+
hdr = ipv6_hdr(skb);
- deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
+ deliver = ipv6_chk_mcast_addr(dev, &hdr->daddr, NULL);
+ if (sdif)
+ rcu_read_unlock();
#ifdef CONFIG_IPV6_MROUTE
/*
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index c7e495f12011..5c045691c302 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -20,6 +20,23 @@
#include "ip6_offload.h"
+/* All GRO functions are always builtin, except UDP over ipv6, which lays in
+ * ipv6 module, as it depends on UDPv6 lookup function, so we need special care
+ * when ipv6 is built as a module
+ */
+#if IS_BUILTIN(CONFIG_IPV6)
+#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__)
+#else
+#define INDIRECT_CALL_L4(f, f2, f1, ...) INDIRECT_CALL_1(f, f2, __VA_ARGS__)
+#endif
+
+#define indirect_call_gro_receive_l4(f2, f1, cb, head, skb) \
+({ \
+ unlikely(gro_recursion_inc_test(skb)) ? \
+ NAPI_GRO_CB(skb)->flush |= 1, NULL : \
+ INDIRECT_CALL_L4(cb, f2, f1, head, skb); \
+})
+
static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
{
const struct net_offload *ops = NULL;
@@ -164,8 +181,12 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
return len;
}
-static struct sk_buff *ipv6_gro_receive(struct list_head *head,
- struct sk_buff *skb)
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp6_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
+ struct sk_buff *skb)
{
const struct net_offload *ops;
struct sk_buff *pp = NULL;
@@ -229,14 +250,21 @@ static struct sk_buff *ipv6_gro_receive(struct list_head *head,
* XXX skbs on the gro_list have all been parsed and pulled
* already so we don't need to compare nlen
* (nlen != (sizeof(*iph2) + ipv6_exthdrs_len(iph2, &ops)))
- * memcmp() alone below is suffcient, right?
+ * memcmp() alone below is sufficient, right?
*/
if ((first_word & htonl(0xF00FFFFF)) ||
- memcmp(&iph->nexthdr, &iph2->nexthdr,
- nlen - offsetof(struct ipv6hdr, nexthdr))) {
+ !ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
+ !ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
+ *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) {
+not_same_flow:
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}
+ if (unlikely(nlen > sizeof(struct ipv6hdr))) {
+ if (memcmp(iph + 1, iph2 + 1,
+ nlen - sizeof(struct ipv6hdr)))
+ goto not_same_flow;
+ }
/* flush if Traffic Class fields are different */
NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
NAPI_GRO_CB(p)->flush |= flush;
@@ -253,7 +281,8 @@ static struct sk_buff *ipv6_gro_receive(struct list_head *head,
skb_gro_postpull_rcsum(skb, iph, nlen);
- pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+ pp = indirect_call_gro_receive_l4(tcp6_gro_receive, udp6_gro_receive,
+ ops->callbacks.gro_receive, head, skb);
out_unlock:
rcu_read_unlock();
@@ -294,7 +323,9 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
return inet_gro_receive(head, skb);
}
-static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
+INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct net_offload *ops;
struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
@@ -313,7 +344,8 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
goto out_unlock;
- err = ops->callbacks.gro_complete(skb, nhoff);
+ err = INDIRECT_CALL_L4(ops->callbacks.gro_complete, tcp6_gro_complete,
+ udp6_gro_complete, skb, nhoff);
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4591ca4bdbe8..5f9fa0302b5a 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -378,6 +378,13 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
+#ifdef CONFIG_NET_SWITCHDEV
+ if (skb->offload_l3_fwd_mark) {
+ consume_skb(skb);
+ return 0;
+ }
+#endif
+
skb->tstamp = 0;
return dst_output(net, sk, skb);
}
@@ -575,6 +582,7 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
+ skb_ext_copy(to, from);
skb_copy_secmark(to, from);
}
@@ -1246,6 +1254,7 @@ static int __ip6_append_data(struct sock *sk,
{
struct sk_buff *skb, *skb_prev = NULL;
unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
+ struct ubuf_info *uarg = NULL;
int exthdrlen = 0;
int dst_exthdrlen = 0;
int hh_len;
@@ -1258,7 +1267,7 @@ static int __ip6_append_data(struct sock *sk,
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
unsigned int wmem_alloc_delta = 0;
- bool paged;
+ bool paged, extra_uref;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1323,6 +1332,20 @@ emsgsize:
rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
csummode = CHECKSUM_PARTIAL;
+ if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
+ uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ if (!uarg)
+ return -ENOBUFS;
+ extra_uref = true;
+ if (rt->dst.dev->features & NETIF_F_SG &&
+ csummode == CHECKSUM_PARTIAL) {
+ paged = true;
+ } else {
+ uarg->zerocopy = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
+ }
+ }
+
/*
* Let's try using as much space as possible.
* Use MTU if total length of the message fits into the MTU.
@@ -1441,12 +1464,6 @@ alloc_new_skb:
skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
dst_exthdrlen);
- /* Only the initial fragment is time stamped */
- skb_shinfo(skb)->tx_flags = cork->tx_flags;
- cork->tx_flags = 0;
- skb_shinfo(skb)->tskey = tskey;
- tskey = 0;
-
/*
* Find where to start putting bytes
*/
@@ -1478,6 +1495,13 @@ alloc_new_skb:
exthdrlen = 0;
dst_exthdrlen = 0;
+ /* Only the initial fragment is time stamped */
+ skb_shinfo(skb)->tx_flags = cork->tx_flags;
+ cork->tx_flags = 0;
+ skb_shinfo(skb)->tskey = tskey;
+ tskey = 0;
+ skb_zcopy_set(skb, uarg, &extra_uref);
+
if ((flags & MSG_CONFIRM) && !skb_prev)
skb_set_dst_pending_confirm(skb, 1);
@@ -1507,7 +1531,7 @@ alloc_new_skb:
err = -EFAULT;
goto error;
}
- } else {
+ } else if (!uarg || !uarg->zerocopy) {
int i = skb_shinfo(skb)->nr_frags;
err = -ENOMEM;
@@ -1537,6 +1561,10 @@ alloc_new_skb:
skb->data_len += copy;
skb->truesize += copy;
wmem_alloc_delta += copy;
+ } else {
+ err = skb_zerocopy_iter_dgram(skb, from, copy);
+ if (err < 0)
+ goto error;
}
offset += copy;
length -= copy;
@@ -1549,6 +1577,8 @@ alloc_new_skb:
error_efault:
err = -EFAULT;
error:
+ if (uarg)
+ sock_zerocopy_put_abort(uarg, extra_uref);
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index caad40d6e74d..ad1a9ccd4b44 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -31,6 +31,22 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
if (err < 0)
goto error;
}
+ if (cfg->bind_ifindex) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(net, cfg->bind_ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto error;
+ }
+
+ err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE,
+ dev->name, strlen(dev->name) + 1);
+ dev_put(dev);
+
+ if (err < 0)
+ goto error;
+ }
udp6_addr.sin6_family = AF_INET6;
memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 377a2ee5d9ad..8276f1224f16 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -657,7 +657,7 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
return NULL;
}
- if (dev_open(dev))
+ if (dev_open(dev, NULL))
goto failure;
dev_hold(dev);
@@ -1972,7 +1972,7 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
*/
static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *c, int vifi)
+ struct sk_buff *skb, int vifi)
{
struct ipv6hdr *ipv6h;
struct vif_device *vif = &mrt->vif_table[vifi];
@@ -2138,15 +2138,14 @@ forward:
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ip6mr_forward2(net, mrt, skb2,
- c, psend);
+ ip6mr_forward2(net, mrt, skb2, psend);
}
psend = ct;
}
}
last_forward:
if (psend != -1) {
- ip6mr_forward2(net, mrt, skb, c, psend);
+ ip6mr_forward2(net, mrt, skb, psend);
return;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 381ce38940ae..973e215c3114 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -486,7 +486,7 @@ sticky_done:
retv = -EFAULT;
break;
}
- if (sk->sk_bound_dev_if && pkt.ipi6_ifindex != sk->sk_bound_dev_if)
+ if (!sk_dev_equal_l3scope(sk, pkt.ipi6_ifindex))
goto e_inval;
np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex;
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 200c0c235565..9ea43d5256e0 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
-nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
+nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o
nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index ca6d38698b1a..23022447eb49 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -61,22 +61,8 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
}
#endif
-static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
- const struct nf_nat_range2 *range)
-{
- return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
- ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
-}
-
-static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t,
- __be16 dport)
-{
- return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport);
-}
-
static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
- const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *target,
enum nf_nat_manip_type maniptype)
{
@@ -96,8 +82,8 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
goto manip_addr;
if ((frag_off & htons(~0x7)) == 0 &&
- !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
- target, maniptype))
+ !nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
+ target, maniptype))
return false;
/* must reload, offset might have changed */
@@ -171,8 +157,6 @@ static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
.l3proto = NFPROTO_IPV6,
- .secure_port = nf_nat_ipv6_secure_port,
- .in_range = nf_nat_ipv6_in_range,
.manip_pkt = nf_nat_ipv6_manip_pkt,
.csum_update = nf_nat_ipv6_csum_update,
.csum_recalc = nf_nat_ipv6_csum_recalc,
@@ -196,7 +180,6 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
} *inside;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
- const struct nf_nat_l4proto *l4proto;
struct nf_conntrack_tuple target;
unsigned long statusbit;
@@ -227,9 +210,8 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
if (!(ct->status & statusbit))
return 1;
- l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr);
if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
- l4proto, &ct->tuplehash[!dir].tuple, !manip))
+ &ct->tuplehash[!dir].tuple, !manip))
return 0;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
@@ -244,8 +226,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
}
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6);
- if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip))
+ if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
return 0;
return 1;
@@ -415,26 +396,12 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn);
static int __init nf_nat_l3proto_ipv6_init(void)
{
- int err;
-
- err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
- if (err < 0)
- goto err1;
- err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
- if (err < 0)
- goto err2;
- return err;
-
-err2:
- nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
-err1:
- return err;
+ return nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
}
static void __exit nf_nat_l3proto_ipv6_exit(void)
{
nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
- nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
}
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
deleted file mode 100644
index d9bf42ba44fa..000000000000
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick Mchardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6
- * NAT funded by Astaro.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/icmpv6.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static bool
-icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
-{
- return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
- ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
-}
-
-static void
-icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- static u16 id;
- unsigned int range_size;
- unsigned int i;
-
- range_size = ntohs(range->max_proto.icmp.id) -
- ntohs(range->min_proto.icmp.id) + 1;
-
- if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
- range_size = 0xffff;
-
- for (i = 0; ; ++id) {
- tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
- (id % range_size));
- if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
- return;
- }
-}
-
-static bool
-icmpv6_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- struct icmp6hdr *hdr;
-
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
- return false;
-
- hdr = (struct icmp6hdr *)(skb->data + hdroff);
- l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
- tuple, maniptype);
- if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
- hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
- inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
- hdr->icmp6_identifier,
- tuple->src.u.icmp.id, false);
- hdr->icmp6_identifier = tuple->src.u.icmp.id;
- }
- return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
- .l4proto = IPPROTO_ICMPV6,
- .manip_pkt = icmpv6_manip_pkt,
- .in_range = icmpv6_in_range,
- .unique_tuple = icmpv6_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
-#endif
-};
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 24858402e374..b9c8a763c863 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -131,6 +131,7 @@ EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_put);
void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
{
+ struct net_device *br_indev __maybe_unused;
struct sk_buff *nskb;
struct tcphdr _otcph;
const struct tcphdr *otcph;
@@ -197,15 +198,18 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
- if (oldskb->nf_bridge) {
+ br_indev = nf_bridge_get_physindev(oldskb);
+ if (br_indev) {
struct ethhdr *oeth = eth_hdr(oldskb);
- nskb->dev = nf_bridge_get_physindev(oldskb);
+ nskb->dev = br_indev;
nskb->protocol = htons(ETH_P_IPV6);
ip6h->payload_len = htons(sizeof(struct tcphdr));
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
- oeth->h_source, oeth->h_dest, nskb->len) < 0)
+ oeth->h_source, oeth->h_dest, nskb->len) < 0) {
+ kfree_skb(nskb);
return;
+ }
dev_queue_xmit(nskb);
} else
#endif
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index fc2b5e845fdf..5a426226c762 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -86,9 +86,8 @@ struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,
!ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))
continue;
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != dif &&
- sk->sk_bound_dev_if != sdif)
+ if (!raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
+ dif, sdif))
continue;
if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 059f0531f7c1..194bc162866d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2977,7 +2977,8 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!rt)
goto out;
- rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len);
+ rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
+ extack);
if (IS_ERR(rt->fib6_metrics)) {
err = PTR_ERR(rt->fib6_metrics);
/* Do not leave garbage there. */
@@ -3710,7 +3711,7 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net,
if (!f6i)
return ERR_PTR(-ENOMEM);
- f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0);
+ f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0, NULL);
f6i->dst_nocount = true;
f6i->dst_host = true;
f6i->fib6_protocol = RTPROT_KERNEL;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 03e6b7a2bc53..b81eb7cb815e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -349,7 +349,7 @@ static void tcp_v6_mtu_reduced(struct sock *sk)
}
}
-static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
@@ -371,17 +371,19 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
- return;
+ return -ENOENT;
}
if (sk->sk_state == TCP_TIME_WAIT) {
inet_twsk_put(inet_twsk(sk));
- return;
+ return 0;
}
seq = ntohl(th->seq);
fatal = icmpv6_err_convert(type, code, &err);
- if (sk->sk_state == TCP_NEW_SYN_RECV)
- return tcp_req_err(sk, seq, fatal);
+ if (sk->sk_state == TCP_NEW_SYN_RECV) {
+ tcp_req_err(sk, seq, fatal);
+ return 0;
+ }
bh_lock_sock(sk);
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
@@ -467,6 +469,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
out:
bh_unlock_sock(sk);
sock_put(sk);
+ return 0;
}
@@ -734,6 +737,7 @@ static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
{
+ bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
struct inet_request_sock *ireq = inet_rsk(req);
const struct ipv6_pinfo *np = inet6_sk(sk_listener);
@@ -741,7 +745,7 @@ static void tcp_v6_init_req(struct request_sock *req,
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
/* So that link locals have meaning */
- if (!sk_listener->sk_bound_dev_if &&
+ if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index e72947c99454..3179c425d7ff 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -9,14 +9,15 @@
*
* TCPv6 GSO/GRO support
*/
+#include <linux/indirect_call_wrapper.h>
#include <linux/skbuff.h>
#include <net/protocol.h>
#include <net/tcp.h>
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
-static struct sk_buff *tcp6_gro_receive(struct list_head *head,
- struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
@@ -29,7 +30,7 @@ static struct sk_buff *tcp6_gro_receive(struct list_head *head,
return tcp_gro_receive(head, skb);
}
-static int tcp6_gro_complete(struct sk_buff *skb, int thoff)
+INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct tcphdr *th = tcp_hdr(skb);
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index dae25cad05cd..1991dede7367 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -134,24 +134,28 @@ drop:
return 0;
}
-static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct xfrm6_tunnel *handler;
for_each_tunnel_rcu(tunnel6_handlers, handler)
if (!handler->err_handler(skb, opt, type, code, offset, info))
- break;
+ return 0;
+
+ return -ENOENT;
}
-static void tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct xfrm6_tunnel *handler;
for_each_tunnel_rcu(tunnel46_handlers, handler)
if (!handler->err_handler(skb, opt, type, code, offset, info))
- break;
+ return 0;
+
+ return -ENOENT;
}
static const struct inet6_protocol tunnel6_protocol = {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d2d97d07ef27..9cbf363172bd 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -45,6 +45,7 @@
#include <net/raw.h>
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
+#include <net/ip6_tunnel.h>
#include <net/xfrm.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
@@ -117,12 +118,16 @@ static int compute_score(struct sock *sk, struct net *net,
{
int score;
struct inet_sock *inet;
+ bool dev_match;
if (!net_eq(sock_net(sk), net) ||
udp_sk(sk)->udp_port_hash != hnum ||
sk->sk_family != PF_INET6)
return -1;
+ if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
+ return -1;
+
score = 0;
inet = inet_sk(sk);
@@ -132,27 +137,16 @@ static int compute_score(struct sock *sk, struct net *net,
score++;
}
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
- return -1;
- score++;
- }
-
if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
return -1;
score++;
}
- if (sk->sk_bound_dev_if || exact_dif) {
- bool dev_match = (sk->sk_bound_dev_if == dif ||
- sk->sk_bound_dev_if == sdif);
-
- if (!dev_match)
- return -1;
- if (sk->sk_bound_dev_if)
- score++;
- }
+ dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif);
+ if (!dev_match)
+ return -1;
+ score++;
if (sk->sk_incoming_cpu == raw_smp_processor_id())
score++;
@@ -200,66 +194,32 @@ struct sock *__udp6_lib_lookup(struct net *net,
int dif, int sdif, struct udp_table *udptable,
struct sk_buff *skb)
{
- struct sock *sk, *result;
unsigned short hnum = ntohs(dport);
- unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
- struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
+ unsigned int hash2, slot2;
+ struct udp_hslot *hslot2;
+ struct sock *result;
bool exact_dif = udp6_lib_exact_dif_match(net, skb);
- int score, badness;
- u32 hash = 0;
- if (hslot->count > 10) {
- hash2 = ipv6_portaddr_hash(net, daddr, hnum);
+ hash2 = ipv6_portaddr_hash(net, daddr, hnum);
+ slot2 = hash2 & udptable->mask;
+ hslot2 = &udptable->hash2[slot2];
+
+ result = udp6_lib_lookup2(net, saddr, sport,
+ daddr, hnum, dif, sdif, exact_dif,
+ hslot2, skb);
+ if (!result) {
+ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
+
hslot2 = &udptable->hash2[slot2];
- if (hslot->count < hslot2->count)
- goto begin;
result = udp6_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, sdif, exact_dif,
- hslot2, skb);
- if (!result) {
- unsigned int old_slot2 = slot2;
- hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
- slot2 = hash2 & udptable->mask;
- /* avoid searching the same slot again. */
- if (unlikely(slot2 == old_slot2))
- return result;
-
- hslot2 = &udptable->hash2[slot2];
- if (hslot->count < hslot2->count)
- goto begin;
-
- result = udp6_lib_lookup2(net, saddr, sport,
- daddr, hnum, dif, sdif,
- exact_dif, hslot2,
- skb);
- }
- if (unlikely(IS_ERR(result)))
- return NULL;
- return result;
- }
-begin:
- result = NULL;
- badness = -1;
- sk_for_each_rcu(sk, &hslot->head) {
- score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
- sdif, exact_dif);
- if (score > badness) {
- if (sk->sk_reuseport) {
- hash = udp6_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (unlikely(IS_ERR(result)))
- return NULL;
- if (result)
- return result;
- }
- result = sk;
- badness = score;
- }
+ &in6addr_any, hnum, dif, sdif,
+ exact_dif, hslot2,
+ skb);
}
+ if (unlikely(IS_ERR(result)))
+ return NULL;
return result;
}
EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
@@ -329,6 +289,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int err;
int is_udplite = IS_UDPLITE(sk);
bool checksum_valid = false;
+ struct udp_mib *mib;
int is_udp4;
if (flags & MSG_ERRQUEUE)
@@ -352,6 +313,7 @@ try_again:
msg->msg_flags |= MSG_TRUNC;
is_udp4 = (skb->protocol == htons(ETH_P_IP));
+ mib = __UDPX_MIB(sk, is_udp4);
/*
* If checksum is needed at all, try to do it while copying the
@@ -380,24 +342,13 @@ try_again:
if (unlikely(err)) {
if (!peeked) {
atomic_inc(&sk->sk_drops);
- if (is_udp4)
- UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
- is_udplite);
- else
- UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
- is_udplite);
+ SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
}
kfree_skb(skb);
return err;
}
- if (!peeked) {
- if (is_udp4)
- UDP_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS,
- is_udplite);
- else
- UDP6_INC_STATS(sock_net(sk), UDP_MIB_INDATAGRAMS,
- is_udplite);
- }
+ if (!peeked)
+ SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS);
sock_recv_ts_and_drops(msg, sk, skb);
@@ -421,6 +372,9 @@ try_again:
*addr_len = sizeof(*sin6);
}
+ if (udp_sk(sk)->gro_enabled)
+ udp_cmsg_recv(msg, sk, skb);
+
if (np->rxopt.all)
ip6_datagram_recv_common_ctl(sk, msg, skb);
@@ -443,17 +397,8 @@ try_again:
csum_copy_err:
if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
udp_skb_destructor)) {
- if (is_udp4) {
- UDP_INC_STATS(sock_net(sk),
- UDP_MIB_CSUMERRORS, is_udplite);
- UDP_INC_STATS(sock_net(sk),
- UDP_MIB_INERRORS, is_udplite);
- } else {
- UDP6_INC_STATS(sock_net(sk),
- UDP_MIB_CSUMERRORS, is_udplite);
- UDP6_INC_STATS(sock_net(sk),
- UDP_MIB_INERRORS, is_udplite);
- }
+ SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS);
+ SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
}
kfree_skb(skb);
@@ -463,15 +408,106 @@ csum_copy_err:
goto try_again;
}
-void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info,
- struct udp_table *udptable)
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+void udpv6_encap_enable(void)
+{
+ static_branch_inc(&udpv6_encap_needed_key);
+}
+EXPORT_SYMBOL(udpv6_encap_enable);
+
+/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
+ * through error handlers in encapsulations looking for a match.
+ */
+static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb,
+ struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, u32 info)
+{
+ int i;
+
+ for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
+ int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, u32 info);
+
+ if (!ip6tun_encaps[i])
+ continue;
+ handler = rcu_dereference(ip6tun_encaps[i]->err_handler);
+ if (handler && !handler(skb, opt, type, code, offset, info))
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+/* Try to match ICMP errors to UDP tunnels by looking up a socket without
+ * reversing source and destination port: this will match tunnels that force the
+ * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
+ * lwtunnels might actually break this assumption by being configured with
+ * different destination ports on endpoints, in this case we won't be able to
+ * trace ICMP messages back to them.
+ *
+ * If this doesn't match any socket, probe tunnels with arbitrary destination
+ * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
+ * we've sent packets to won't necessarily match the local destination port.
+ *
+ * Then ask the tunnel implementation to match the error against a valid
+ * association.
+ *
+ * Return an error if we can't find a match, the socket if we need further
+ * processing, zero otherwise.
+ */
+static struct sock *__udp6_lib_err_encap(struct net *net,
+ const struct ipv6hdr *hdr, int offset,
+ struct udphdr *uh,
+ struct udp_table *udptable,
+ struct sk_buff *skb,
+ struct inet6_skb_parm *opt,
+ u8 type, u8 code, __be32 info)
+{
+ int network_offset, transport_offset;
+ struct sock *sk;
+
+ network_offset = skb_network_offset(skb);
+ transport_offset = skb_transport_offset(skb);
+
+ /* Network header needs to point to the outer IPv6 header inside ICMP */
+ skb_reset_network_header(skb);
+
+ /* Transport header needs to point to the UDP header */
+ skb_set_transport_header(skb, offset);
+
+ sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source,
+ &hdr->saddr, uh->dest,
+ inet6_iif(skb), 0, udptable, skb);
+ if (sk) {
+ int (*lookup)(struct sock *sk, struct sk_buff *skb);
+ struct udp_sock *up = udp_sk(sk);
+
+ lookup = READ_ONCE(up->encap_err_lookup);
+ if (!lookup || lookup(sk, skb))
+ sk = NULL;
+ }
+
+ if (!sk) {
+ sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code,
+ offset, info));
+ }
+
+ skb_set_transport_header(skb, transport_offset);
+ skb_set_network_header(skb, network_offset);
+
+ return sk;
+}
+
+int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ u8 type, u8 code, int offset, __be32 info,
+ struct udp_table *udptable)
{
struct ipv6_pinfo *np;
const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
const struct in6_addr *saddr = &hdr->saddr;
const struct in6_addr *daddr = &hdr->daddr;
struct udphdr *uh = (struct udphdr *)(skb->data+offset);
+ bool tunnel = false;
struct sock *sk;
int harderr;
int err;
@@ -480,9 +516,23 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
inet6_iif(skb), inet6_sdif(skb), udptable, skb);
if (!sk) {
- __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
- ICMP6_MIB_INERRORS);
- return;
+ /* No socket for error: try tunnels before discarding */
+ sk = ERR_PTR(-ENOENT);
+ if (static_branch_unlikely(&udpv6_encap_needed_key)) {
+ sk = __udp6_lib_err_encap(net, hdr, offset, uh,
+ udptable, skb,
+ opt, type, code, info);
+ if (!sk)
+ return 0;
+ }
+
+ if (IS_ERR(sk)) {
+ __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+ ICMP6_MIB_INERRORS);
+ return PTR_ERR(sk);
+ }
+
+ tunnel = true;
}
harderr = icmpv6_err_convert(type, code, &err);
@@ -496,10 +546,19 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
harderr = 1;
}
if (type == NDISC_REDIRECT) {
- ip6_sk_redirect(skb, sk);
+ if (tunnel) {
+ ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
+ sk->sk_mark, sk->sk_uid);
+ } else {
+ ip6_sk_redirect(skb, sk);
+ }
goto out;
}
+ /* Tunnels don't have an application socket: don't pass errors back */
+ if (tunnel)
+ goto out;
+
if (!np->recverr) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
@@ -510,7 +569,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk->sk_err = err;
sk->sk_error_report(sk);
out:
- return;
+ return 0;
}
static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
@@ -541,21 +600,14 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
return 0;
}
-static __inline__ void udpv6_err(struct sk_buff *skb,
- struct inet6_skb_parm *opt, u8 type,
- u8 code, int offset, __be32 info)
+static __inline__ int udpv6_err(struct sk_buff *skb,
+ struct inet6_skb_parm *opt, u8 type,
+ u8 code, int offset, __be32 info)
{
- __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
+ return __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
}
-DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
-void udpv6_encap_enable(void)
-{
- static_branch_enable(&udpv6_encap_needed_key);
-}
-EXPORT_SYMBOL(udpv6_encap_enable);
-
-static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
int is_udplite = IS_UDPLITE(sk);
@@ -638,10 +690,32 @@ drop:
return -1;
}
+static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ struct sk_buff *next, *segs;
+ int ret;
+
+ if (likely(!udp_unexpected_gso(sk, skb)))
+ return udpv6_queue_rcv_one_skb(sk, skb);
+
+ __skb_push(skb, -skb_mac_offset(skb));
+ segs = udp_rcv_segment(sk, skb, false);
+ for (skb = segs; skb; skb = next) {
+ next = skb->next;
+ __skb_pull(skb, skb_transport_offset(skb));
+
+ ret = udpv6_queue_rcv_one_skb(sk, skb);
+ if (ret > 0)
+ ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret,
+ true);
+ }
+ return 0;
+}
+
static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
__be16 loc_port, const struct in6_addr *loc_addr,
__be16 rmt_port, const struct in6_addr *rmt_addr,
- int dif, unsigned short hnum)
+ int dif, int sdif, unsigned short hnum)
{
struct inet_sock *inet = inet_sk(sk);
@@ -653,7 +727,7 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
(inet->inet_dport && inet->inet_dport != rmt_port) ||
(!ipv6_addr_any(&sk->sk_v6_daddr) &&
!ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
- (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) ||
+ !udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif) ||
(!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)))
return false;
@@ -687,6 +761,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
unsigned int offset = offsetof(typeof(*sk), sk_node);
unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
int dif = inet6_iif(skb);
+ int sdif = inet6_sdif(skb);
struct hlist_node *node;
struct sk_buff *nskb;
@@ -701,7 +776,8 @@ start_lookup:
sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr,
- uh->source, saddr, dif, hnum))
+ uh->source, saddr, dif, sdif,
+ hnum))
continue;
/* If zero checksum and no_check is not on for
* the socket then skip it.
@@ -1458,11 +1534,15 @@ void udpv6_destroy_sock(struct sock *sk)
udp_v6_flush_pending_frames(sk);
release_sock(sk);
- if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
- void (*encap_destroy)(struct sock *sk);
- encap_destroy = READ_ONCE(up->encap_destroy);
- if (encap_destroy)
- encap_destroy(sk);
+ if (static_branch_unlikely(&udpv6_encap_needed_key)) {
+ if (up->encap_type) {
+ void (*encap_destroy)(struct sock *sk);
+ encap_destroy = READ_ONCE(up->encap_destroy);
+ if (encap_destroy)
+ encap_destroy(sk);
+ }
+ if (up->encap_enabled)
+ static_branch_dec(&udpv6_encap_needed_key);
}
inet6_destroy_sock(sk);
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 7903e21c178b..5730e6503cb4 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -9,8 +9,8 @@
#include <net/transp_v6.h>
int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int);
-void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
- __be32, struct udp_table *);
+int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
+ __be32, struct udp_table *);
int udp_v6_get_port(struct sock *sk, unsigned short snum);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 1b8e161ac527..83b11d0ac091 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -11,6 +11,7 @@
*/
#include <linux/skbuff.h>
#include <linux/netdevice.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/protocol.h>
#include <net/ipv6.h>
#include <net/udp.h>
@@ -114,8 +115,8 @@ out:
return segs;
}
-static struct sk_buff *udp6_gro_receive(struct list_head *head,
- struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
@@ -142,18 +143,14 @@ flush:
return NULL;
}
-static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
+INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
{
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
- if (uh->check) {
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM;
+ if (uh->check)
uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr,
&ipv6h->daddr, 0);
- } else {
- skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
- }
return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb);
}
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 5000ad6878e6..a125aebc29e5 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -20,11 +20,12 @@ static int udplitev6_rcv(struct sk_buff *skb)
return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
}
-static void udplitev6_err(struct sk_buff *skb,
+static int udplitev6_err(struct sk_buff *skb,
struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table);
+ return __udp6_lib_err(skb, opt, type, code, offset, info,
+ &udplite_table);
}
static const struct inet6_protocol udplitev6_protocol = {
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 9ef490dddcea..a52cb3fc6df5 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -86,14 +86,16 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
{
struct net *net = dev_net(skb->dev);
struct xfrm_state *x = NULL;
+ struct sec_path *sp;
int i = 0;
- if (secpath_set(skb)) {
+ sp = secpath_set(skb);
+ if (!sp) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
goto drop;
}
- if (1 + skb->sp->len == XFRM_MAX_DEPTH) {
+ if (1 + sp->len == XFRM_MAX_DEPTH) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
goto drop;
}
@@ -145,7 +147,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
goto drop;
}
- skb->sp->xvec[skb->sp->len++] = x;
+ sp->xvec[sp->len++] = x;
spin_lock(&x->lock);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d35bcf92969c..769f8f78d3b8 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -262,7 +262,6 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (xdst->u.rt6.rt6i_idev->dev == dev) {
struct inet6_dev *loopback_idev =
in6_dev_get(dev_net(dev)->loopback_dev);
- BUG_ON(!loopback_idev);
do {
in6_dev_put(xdst->u.rt6.rt6i_idev);
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
index b2dc8ce49378..cc979b702c89 100644
--- a/net/ipv6/xfrm6_protocol.c
+++ b/net/ipv6/xfrm6_protocol.c
@@ -80,14 +80,16 @@ static int xfrm6_esp_rcv(struct sk_buff *skb)
return 0;
}
-static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct xfrm6_protocol *handler;
for_each_protocol_rcu(esp6_handlers, handler)
if (!handler->err_handler(skb, opt, type, code, offset, info))
- break;
+ return 0;
+
+ return -ENOENT;
}
static int xfrm6_ah_rcv(struct sk_buff *skb)
@@ -107,14 +109,16 @@ static int xfrm6_ah_rcv(struct sk_buff *skb)
return 0;
}
-static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct xfrm6_protocol *handler;
for_each_protocol_rcu(ah6_handlers, handler)
if (!handler->err_handler(skb, opt, type, code, offset, info))
- break;
+ return 0;
+
+ return -ENOENT;
}
static int xfrm6_ipcomp_rcv(struct sk_buff *skb)
@@ -134,14 +138,16 @@ static int xfrm6_ipcomp_rcv(struct sk_buff *skb)
return 0;
}
-static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+static int xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
struct xfrm6_protocol *handler;
for_each_protocol_rcu(ipcomp6_handlers, handler)
if (!handler->err_handler(skb, opt, type, code, offset, info))
- break;
+ return 0;
+
+ return -ENOENT;
}
static const struct inet6_protocol esp6_protocol = {
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 4a46df8441c9..f5b4febeaa25 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -144,6 +144,9 @@ static u32 __xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr)
index = __xfrm6_tunnel_spi_check(net, spi);
if (index >= 0)
goto alloc_spi;
+
+ if (spi == XFRM6_TUNNEL_SPI_MAX)
+ break;
}
for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tn->spi; spi++) {
index = __xfrm6_tunnel_spi_check(net, spi);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 0bed4cc20603..78ea5a739d10 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1873,30 +1873,26 @@ static void iucv_callback_txdone(struct iucv_path *path,
struct sock *sk = path->private;
struct sk_buff *this = NULL;
struct sk_buff_head *list = &iucv_sk(sk)->send_skb_q;
- struct sk_buff *list_skb = list->next;
+ struct sk_buff *list_skb;
unsigned long flags;
bh_lock_sock(sk);
- if (!skb_queue_empty(list)) {
- spin_lock_irqsave(&list->lock, flags);
- while (list_skb != (struct sk_buff *)list) {
- if (msg->tag == IUCV_SKB_CB(list_skb)->tag) {
- this = list_skb;
- break;
- }
- list_skb = list_skb->next;
+ spin_lock_irqsave(&list->lock, flags);
+ skb_queue_walk(list, list_skb) {
+ if (msg->tag == IUCV_SKB_CB(list_skb)->tag) {
+ this = list_skb;
+ break;
}
- if (this)
- __skb_unlink(this, list);
-
- spin_unlock_irqrestore(&list->lock, flags);
+ }
+ if (this)
+ __skb_unlink(this, list);
+ spin_unlock_irqrestore(&list->lock, flags);
- if (this) {
- kfree_skb(this);
- /* wake up any process waiting for sending */
- iucv_sock_wake_msglim(sk);
- }
+ if (this) {
+ kfree_skb(this);
+ /* wake up any process waiting for sending */
+ iucv_sock_wake_msglim(sk);
}
if (sk->sk_state == IUCV_CLOSING) {
@@ -2284,11 +2280,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
list = &iucv->send_skb_q;
spin_lock_irqsave(&list->lock, flags);
- if (skb_queue_empty(list))
- goto out_unlock;
- list_skb = list->next;
- nskb = list_skb->next;
- while (list_skb != (struct sk_buff *)list) {
+ skb_queue_walk_safe(list, list_skb, nskb) {
if (skb_shinfo(list_skb) == skb_shinfo(skb)) {
switch (n) {
case TX_NOTIFY_OK:
@@ -2321,10 +2313,7 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *skb,
}
break;
}
- list_skb = nskb;
- nskb = nskb->next;
}
-out_unlock:
spin_unlock_irqrestore(&list->lock, flags);
if (sk->sk_state == IUCV_CLOSING) {
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9d61266526e7..655c787f9d54 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2020,7 +2020,7 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
static inline int pfkey_xfrm_policy2sec_ctx_size(const struct xfrm_policy *xp)
{
- struct xfrm_sec_ctx *xfrm_ctx = xp->security;
+ struct xfrm_sec_ctx *xfrm_ctx = xp->security;
if (xfrm_ctx) {
int len = sizeof(struct sadb_x_sec_ctx);
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index 8da86ceca33d..309dee76724e 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -47,6 +47,24 @@ int l3mdev_master_ifindex_rcu(const struct net_device *dev)
EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
/**
+ * l3mdev_master_upper_ifindex_by_index - get index of upper l3 master
+ * device
+ * @net: network namespace for device index lookup
+ * @ifindex: targeted interface
+ */
+int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex)
+{
+ struct net_device *dev;
+
+ dev = dev_get_by_index_rcu(net, ifindex);
+ while (dev && !netif_is_l3_master(dev))
+ dev = netdev_master_upper_dev_get(dev);
+
+ return dev ? dev->ifindex : 0;
+}
+EXPORT_SYMBOL_GPL(l3mdev_master_upper_ifindex_by_index_rcu);
+
+/**
* l3mdev_fib_table - get FIB table id associated with an L3
* master interface
* @dev: targeted interface
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index f869e35d0974..be471fe95048 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -57,14 +57,13 @@ comment "Some wireless drivers require a rate control algorithm"
depends on MAC80211 && MAC80211_HAS_RC=n
config MAC80211_MESH
- bool "Enable mac80211 mesh networking (pre-802.11s) support"
+ bool "Enable mac80211 mesh networking support"
depends on MAC80211
---help---
- This options enables support of Draft 802.11s mesh networking.
- The implementation is based on Draft 2.08 of the Mesh Networking
- amendment. However, no compliance with that draft is claimed or even
- possible, as drafts leave a number of identifiers to be defined after
- ratification. For more information visit http://o11s.org/.
+ Select this option to enable 802.11 mesh operation in mac80211
+ drivers that support it. 802.11 mesh connects multiple stations
+ over (possibly multi-hop) wireless links to form a single logical
+ LAN.
config MAC80211_LEDS
bool "Enable LED triggers"
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 818aa0060349..de65fe3ed9cc 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -800,8 +800,8 @@ static int ieee80211_set_ftm_responder_params(
u8 *pos;
int len;
- if ((!lci || !lci_len) && (!civicloc || !civicloc_len))
- return 1;
+ if (!lci_len && !civicloc_len)
+ return 0;
bss_conf = &sdata->vif.bss_conf;
old = bss_conf->ftmr_params;
@@ -2028,6 +2028,9 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
nconf->dot11MeshAwakeWindowDuration;
if (_chg_mesh_attr(NL80211_MESHCONF_PLINK_TIMEOUT, mask))
conf->plink_timeout = nconf->plink_timeout;
+ if (_chg_mesh_attr(NL80211_MESHCONF_CONNECTED_TO_GATE, mask))
+ conf->dot11MeshConnectedToMeshGate =
+ nconf->dot11MeshConnectedToMeshGate;
ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON);
return 0;
}
@@ -3850,6 +3853,26 @@ ieee80211_get_ftm_responder_stats(struct wiphy *wiphy,
return drv_get_ftm_responder_stats(local, sdata, ftm_stats);
}
+static int
+ieee80211_start_pmsr(struct wiphy *wiphy, struct wireless_dev *dev,
+ struct cfg80211_pmsr_request *request)
+{
+ struct ieee80211_local *local = wiphy_priv(wiphy);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(dev);
+
+ return drv_start_pmsr(local, sdata, request);
+}
+
+static void
+ieee80211_abort_pmsr(struct wiphy *wiphy, struct wireless_dev *dev,
+ struct cfg80211_pmsr_request *request)
+{
+ struct ieee80211_local *local = wiphy_priv(wiphy);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(dev);
+
+ return drv_abort_pmsr(local, sdata, request);
+}
+
const struct cfg80211_ops mac80211_config_ops = {
.add_virtual_intf = ieee80211_add_iface,
.del_virtual_intf = ieee80211_del_iface,
@@ -3945,4 +3968,6 @@ const struct cfg80211_ops mac80211_config_ops = {
.tx_control_port = ieee80211_tx_control_port,
.get_txq_stats = ieee80211_get_txq_stats,
.get_ftm_responder_stats = ieee80211_get_ftm_responder_stats,
+ .start_pmsr = ieee80211_start_pmsr,
+ .abort_pmsr = ieee80211_abort_pmsr,
};
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index c813207bb123..cff0fb3578c9 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -641,6 +641,8 @@ IEEE80211_IF_FILE(dot11MeshHWMPconfirmationInterval,
IEEE80211_IF_FILE(power_mode, u.mesh.mshcfg.power_mode, DEC);
IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration,
u.mesh.mshcfg.dot11MeshAwakeWindowDuration, DEC);
+IEEE80211_IF_FILE(dot11MeshConnectedToMeshGate,
+ u.mesh.mshcfg.dot11MeshConnectedToMeshGate, DEC);
#endif
#define DEBUGFS_ADD_MODE(name, mode) \
@@ -762,6 +764,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
MESHPARAMS_ADD(dot11MeshHWMPconfirmationInterval);
MESHPARAMS_ADD(power_mode);
MESHPARAMS_ADD(dot11MeshAwakeWindowDuration);
+ MESHPARAMS_ADD(dot11MeshConnectedToMeshGate);
#undef MESHPARAMS_ADD
}
#endif
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index af5185a836e5..b753194710ad 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -795,22 +795,22 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
#define PRINT_NSS_SUPP(f, n) \
do { \
- int i; \
+ int _i; \
u16 v = le16_to_cpu(nss->f); \
p += scnprintf(p, buf_sz + buf - p, n ": %#.4x\n", v); \
- for (i = 0; i < 8; i += 2) { \
- switch ((v >> i) & 0x3) { \
+ for (_i = 0; _i < 8; _i += 2) { \
+ switch ((v >> _i) & 0x3) { \
case 0: \
- PRINT(n "-%d-SUPPORT-0-7", i / 2); \
+ PRINT(n "-%d-SUPPORT-0-7", _i / 2); \
break; \
case 1: \
- PRINT(n "-%d-SUPPORT-0-9", i / 2); \
+ PRINT(n "-%d-SUPPORT-0-9", _i / 2); \
break; \
case 2: \
- PRINT(n "-%d-SUPPORT-0-11", i / 2); \
+ PRINT(n "-%d-SUPPORT-0-11", _i / 2); \
break; \
case 3: \
- PRINT(n "-%d-NOT-SUPPORTED", i / 2); \
+ PRINT(n "-%d-NOT-SUPPORTED", _i / 2); \
break; \
} \
} \
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 0b1747a2313d..3e0d5922a440 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1199,6 +1199,40 @@ drv_get_ftm_responder_stats(struct ieee80211_local *local,
return ret;
}
+static inline int drv_start_pmsr(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_pmsr_request *request)
+{
+ int ret = -EOPNOTSUPP;
+
+ might_sleep();
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+
+ trace_drv_start_pmsr(local, sdata);
+
+ if (local->ops->start_pmsr)
+ ret = local->ops->start_pmsr(&local->hw, &sdata->vif, request);
+ trace_drv_return_int(local, ret);
+
+ return ret;
+}
+
+static inline void drv_abort_pmsr(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_pmsr_request *request)
+{
+ trace_drv_abort_pmsr(local, sdata);
+
+ might_sleep();
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ if (local->ops->abort_pmsr)
+ local->ops->abort_pmsr(&local->hw, &sdata->vif, request);
+ trace_drv_return_void(local);
+}
+
static inline int drv_start_nan(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct cfg80211_nan_conf *conf)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 10a05062e4a0..7dfb4e2f98b2 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -500,6 +500,7 @@ struct ieee80211_if_managed {
unsigned int uapsd_max_sp_len;
int wmm_last_param_set;
+ int mu_edca_last_param_set;
u8 use_4addr;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 3a0171a65db3..4a6ff1482a9f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1802,7 +1802,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
}
ieee80211_assign_perm_addr(local, ndev->perm_addr, type);
- if (params && is_valid_ether_addr(params->macaddr))
+ if (is_valid_ether_addr(params->macaddr))
memcpy(ndev->dev_addr, params->macaddr, ETH_ALEN);
else
memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN);
@@ -1871,11 +1871,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ieee80211_setup_sdata(sdata, type);
if (ndev) {
- if (params) {
- ndev->ieee80211_ptr->use_4addr = params->use_4addr;
- if (type == NL80211_IFTYPE_STATION)
- sdata->u.mgd.use_4addr = params->use_4addr;
- }
+ ndev->ieee80211_ptr->use_4addr = params->use_4addr;
+ if (type == NL80211_IFTYPE_STATION)
+ sdata->u.mgd.use_4addr = params->use_4addr;
ndev->features |= local->hw.netdev_features;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 7b8320d4a8e4..87a729926734 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1221,8 +1221,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
/* add one default STA interface if supported */
if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION) &&
!ieee80211_hw_check(hw, NO_AUTO_VIF)) {
+ struct vif_params params = {0};
+
result = ieee80211_if_add(local, "wlan%d", NET_NAME_ENUM, NULL,
- NL80211_IFTYPE_STATION, NULL);
+ NL80211_IFTYPE_STATION, &params);
if (result)
wiphy_warn(local->hw.wiphy,
"Failed to add default virtual iface\n");
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 8bad414c52ad..c90452aa0c42 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -254,6 +254,9 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
u8 *pos, neighbors;
u8 meshconf_len = sizeof(struct ieee80211_meshconf_ie);
+ bool is_connected_to_gate = ifmsh->num_gates > 0 ||
+ ifmsh->mshcfg.dot11MeshGateAnnouncementProtocol ||
+ ifmsh->mshcfg.dot11MeshConnectedToMeshGate;
if (skb_tailroom(skb) < 2 + meshconf_len)
return -ENOMEM;
@@ -278,7 +281,7 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
/* Mesh Formation Info - number of neighbors */
neighbors = atomic_read(&ifmsh->estab_plinks);
neighbors = min_t(int, neighbors, IEEE80211_MAX_MESH_PEERINGS);
- *pos++ = neighbors << 1;
+ *pos++ = (neighbors << 1) | is_connected_to_gate;
/* Mesh capability */
*pos = 0x00;
*pos |= ifmsh->mshcfg.dot11MeshForwarding ?
@@ -1191,7 +1194,8 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
if (!sdata->u.mesh.user_mpm ||
sdata->u.mesh.mshcfg.rssi_threshold == 0 ||
sdata->u.mesh.mshcfg.rssi_threshold < rx_status->signal)
- mesh_neighbour_update(sdata, mgmt->sa, &elems);
+ mesh_neighbour_update(sdata, mgmt->sa, &elems,
+ rx_status);
}
if (ifmsh->sync_ops)
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 21526630bf65..cad6592c52a1 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -273,7 +273,8 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata);
/* Mesh plinks */
void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
- u8 *hw_addr, struct ieee802_11_elems *ie);
+ u8 *hw_addr, struct ieee802_11_elems *ie,
+ struct ieee80211_rx_status *rx_status);
bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
void mesh_plink_timer(struct timer_list *t);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 5b5b0f95ffd1..33055c8ed37e 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -513,7 +513,8 @@ __mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr)
static struct sta_info *
mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,
- struct ieee802_11_elems *elems)
+ struct ieee802_11_elems *elems,
+ struct ieee80211_rx_status *rx_status)
{
struct sta_info *sta = NULL;
@@ -521,11 +522,17 @@ mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,
if (sdata->u.mesh.user_mpm ||
sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) {
if (mesh_peer_accepts_plinks(elems) &&
- mesh_plink_availables(sdata))
+ mesh_plink_availables(sdata)) {
+ int sig = 0;
+
+ if (ieee80211_hw_check(&sdata->local->hw, SIGNAL_DBM))
+ sig = rx_status->signal;
+
cfg80211_notify_new_peer_candidate(sdata->dev, addr,
elems->ie_start,
elems->total_len,
- GFP_KERNEL);
+ sig, GFP_KERNEL);
+ }
} else
sta = __mesh_sta_info_alloc(sdata, addr);
@@ -538,13 +545,15 @@ mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,
* @sdata: local meshif
* @addr: peer's address
* @elems: IEs from beacon or mesh peering frame.
+ * @rx_status: rx status for the frame for signal reporting
*
* Return existing or newly allocated sta_info under RCU read lock.
* (re)initialize with given IEs.
*/
static struct sta_info *
mesh_sta_info_get(struct ieee80211_sub_if_data *sdata,
- u8 *addr, struct ieee802_11_elems *elems) __acquires(RCU)
+ u8 *addr, struct ieee802_11_elems *elems,
+ struct ieee80211_rx_status *rx_status) __acquires(RCU)
{
struct sta_info *sta = NULL;
@@ -555,7 +564,7 @@ mesh_sta_info_get(struct ieee80211_sub_if_data *sdata,
} else {
rcu_read_unlock();
/* can't run atomic */
- sta = mesh_sta_info_alloc(sdata, addr, elems);
+ sta = mesh_sta_info_alloc(sdata, addr, elems, rx_status);
if (!sta) {
rcu_read_lock();
return NULL;
@@ -576,20 +585,25 @@ mesh_sta_info_get(struct ieee80211_sub_if_data *sdata,
* @sdata: local meshif
* @addr: peer's address
* @elems: IEs from beacon or mesh peering frame
+ * @rx_status: rx status for the frame for signal reporting
*
* Initiates peering if appropriate.
*/
void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
u8 *hw_addr,
- struct ieee802_11_elems *elems)
+ struct ieee802_11_elems *elems,
+ struct ieee80211_rx_status *rx_status)
{
struct sta_info *sta;
u32 changed = 0;
- sta = mesh_sta_info_get(sdata, hw_addr, elems);
+ sta = mesh_sta_info_get(sdata, hw_addr, elems, rx_status);
if (!sta)
goto out;
+ sta->mesh->connected_to_gate = elems->mesh_config->meshconf_form &
+ IEEE80211_MESHCONF_FORM_CONNECTED_TO_GATE;
+
if (mesh_peer_accepts_plinks(elems) &&
sta->mesh->plink_state == NL80211_PLINK_LISTEN &&
sdata->u.mesh.accepting_plinks &&
@@ -1069,7 +1083,8 @@ out:
static void
mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt,
- struct ieee802_11_elems *elems)
+ struct ieee802_11_elems *elems,
+ struct ieee80211_rx_status *rx_status)
{
struct sta_info *sta;
@@ -1134,7 +1149,7 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata,
if (event == OPN_ACPT) {
rcu_read_unlock();
/* allocate sta entry if necessary and update info */
- sta = mesh_sta_info_get(sdata, mgmt->sa, elems);
+ sta = mesh_sta_info_get(sdata, mgmt->sa, elems, rx_status);
if (!sta) {
mpl_dbg(sdata, "Mesh plink: failed to init peer!\n");
goto unlock_rcu;
@@ -1200,5 +1215,5 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
return;
}
ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems);
- mesh_process_plink_frame(sdata, mgmt, &elems);
+ mesh_process_plink_frame(sdata, mgmt, &elems, rx_status);
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index bcf5ffc1567a..687821567287 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -916,6 +916,15 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
ieee80211_add_vht_ie(sdata, skb, sband,
&assoc_data->ap_vht_cap);
+ /*
+ * If AP doesn't support HT, mark HE as disabled.
+ * If on the 5GHz band, make sure it supports VHT.
+ */
+ if (ifmgd->flags & IEEE80211_STA_DISABLE_HT ||
+ (sband->band == NL80211_BAND_5GHZ &&
+ ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
+ ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
ieee80211_add_he_ie(sdata, skb, sband);
@@ -1869,7 +1878,7 @@ ieee80211_sta_wmm_params(struct ieee80211_local *local,
struct ieee80211_tx_queue_params params[IEEE80211_NUM_ACS];
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
size_t left;
- int count, ac;
+ int count, mu_edca_count, ac;
const u8 *pos;
u8 uapsd_queues = 0;
@@ -1889,9 +1898,16 @@ ieee80211_sta_wmm_params(struct ieee80211_local *local,
uapsd_queues = ifmgd->uapsd_queues;
count = wmm_param[6] & 0x0f;
- if (count == ifmgd->wmm_last_param_set)
+ /* -1 is the initial value of ifmgd->mu_edca_last_param_set.
+ * if mu_edca was preset before and now it disappeared tell
+ * the driver about it.
+ */
+ mu_edca_count = mu_edca ? mu_edca->mu_qos_info & 0x0f : -1;
+ if (count == ifmgd->wmm_last_param_set &&
+ mu_edca_count == ifmgd->mu_edca_last_param_set)
return false;
ifmgd->wmm_last_param_set = count;
+ ifmgd->mu_edca_last_param_set = mu_edca_count;
pos = wmm_param + 8;
left = wmm_param_len - 8;
@@ -3062,6 +3078,19 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
}
}
+static bool ieee80211_twt_req_supported(const struct sta_info *sta,
+ const struct ieee802_11_elems *elems)
+{
+ if (elems->ext_capab_len < 10)
+ return false;
+
+ if (!(elems->ext_capab[9] & WLAN_EXT_CAPA10_TWT_RESPONDER_SUPPORT))
+ return false;
+
+ return sta->sta.he_cap.he_cap_elem.mac_cap_info[0] &
+ IEEE80211_HE_MAC_CAP0_TWT_RES;
+}
+
static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
struct cfg80211_bss *cbss,
struct ieee80211_mgmt *mgmt, size_t len)
@@ -3215,16 +3244,6 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
goto out;
}
- /*
- * If AP doesn't support HT, or it doesn't have HE mandatory IEs, mark
- * HE as disabled. If on the 5GHz band, make sure it supports VHT.
- */
- if (ifmgd->flags & IEEE80211_STA_DISABLE_HT ||
- (sband->band == NL80211_BAND_5GHZ &&
- ifmgd->flags & IEEE80211_STA_DISABLE_VHT) ||
- (!elems.he_cap && !elems.he_operation))
- ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
-
if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
(!elems.he_cap || !elems.he_operation)) {
mutex_unlock(&sdata->local->sta_mtx);
@@ -3251,8 +3270,11 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
sta);
bss_conf->he_support = sta->sta.he_cap.has_he;
+ bss_conf->twt_requester =
+ ieee80211_twt_req_supported(sta, &elems);
} else {
bss_conf->he_support = false;
+ bss_conf->twt_requester = false;
}
if (bss_conf->he_support) {
@@ -3337,6 +3359,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
* 4-bit value.
*/
ifmgd->wmm_last_param_set = -1;
+ ifmgd->mu_edca_last_param_set = -1;
if (ifmgd->flags & IEEE80211_STA_DISABLE_WMM) {
ieee80211_set_wmm_default(sdata, false, false);
@@ -4660,8 +4683,10 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
}
}
- if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
- ieee80211_get_he_sta_cap(sband)) {
+ if (!ieee80211_get_he_sta_cap(sband))
+ ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) {
const struct cfg80211_bss_ies *ies;
const u8 *he_oper_ie;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 428f7ad5f9b5..45aad3d3108c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -143,6 +143,9 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
/* allocate extra bitmaps */
if (status->chains)
len += 4 * hweight8(status->chains);
+ /* vendor presence bitmap */
+ if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)
+ len += 4;
if (ieee80211_have_rx_timestamp(status)) {
len = ALIGN(len, 8);
@@ -207,8 +210,6 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
if (status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA) {
struct ieee80211_vendor_radiotap *rtap = (void *)skb->data;
- /* vendor presence bitmap */
- len += 4;
/* alignment for fixed 6-byte vendor data header */
len = ALIGN(len, 2);
/* vendor data header */
@@ -753,6 +754,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
struct ieee80211_sub_if_data *monitor_sdata =
rcu_dereference(local->monitor_sdata);
bool only_monitor = false;
+ unsigned int min_head_len;
if (status->flag & RX_FLAG_RADIOTAP_HE)
rtap_space += sizeof(struct ieee80211_radiotap_he);
@@ -760,12 +762,18 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
if (status->flag & RX_FLAG_RADIOTAP_HE_MU)
rtap_space += sizeof(struct ieee80211_radiotap_he_mu);
+ if (status->flag & RX_FLAG_RADIOTAP_LSIG)
+ rtap_space += sizeof(struct ieee80211_radiotap_lsig);
+
if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) {
- struct ieee80211_vendor_radiotap *rtap = (void *)origskb->data;
+ struct ieee80211_vendor_radiotap *rtap =
+ (void *)(origskb->data + rtap_space);
rtap_space += sizeof(*rtap) + rtap->len + rtap->pad;
}
+ min_head_len = rtap_space;
+
/*
* First, we may need to make a copy of the skb because
* (1) we need to modify it for radiotap (if not present), and
@@ -775,18 +783,23 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
* the SKB because it has a bad FCS/PLCP checksum.
*/
- if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) {
- if (unlikely(origskb->len <= FCS_LEN)) {
- /* driver bug */
- WARN_ON(1);
- dev_kfree_skb(origskb);
- return NULL;
+ if (!(status->flag & RX_FLAG_NO_PSDU)) {
+ if (ieee80211_hw_check(&local->hw, RX_INCLUDES_FCS)) {
+ if (unlikely(origskb->len <= FCS_LEN + rtap_space)) {
+ /* driver bug */
+ WARN_ON(1);
+ dev_kfree_skb(origskb);
+ return NULL;
+ }
+ present_fcs_len = FCS_LEN;
}
- present_fcs_len = FCS_LEN;
+
+ /* also consider the hdr->frame_control */
+ min_head_len += 2;
}
- /* ensure hdr->frame_control and vendor radiotap data are in skb head */
- if (!pskb_may_pull(origskb, 2 + rtap_space)) {
+ /* ensure that the expected data elements are in skb head */
+ if (!pskb_may_pull(origskb, min_head_len)) {
dev_kfree_skb(origskb);
return NULL;
}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 5d2a11777718..95413413f98c 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -356,7 +356,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
{
struct ieee80211_local *local = hw_to_local(hw);
- bool hw_scan = local->ops->hw_scan;
+ bool hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning);
bool was_scanning = local->scanning;
struct cfg80211_scan_request *scan_req;
struct ieee80211_sub_if_data *scan_sdata;
@@ -606,6 +606,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
struct cfg80211_scan_request *req)
{
struct ieee80211_local *local = sdata->local;
+ bool hw_scan = local->ops->hw_scan;
int rc;
lockdep_assert_held(&local->mtx);
@@ -620,7 +621,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
return 0;
}
- if (local->ops->hw_scan) {
+ again:
+ if (hw_scan) {
u8 *ies;
local->hw_scan_ies_bufsize = local->scan_ies_len + req->ie_len;
@@ -679,7 +681,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
else
memcpy(local->scan_addr, sdata->vif.addr, ETH_ALEN);
- if (local->ops->hw_scan) {
+ if (hw_scan) {
__set_bit(SCAN_HW_SCANNING, &local->scanning);
} else if ((req->n_channels == 1) &&
(req->channels[0] == local->_oper_chandef.chan)) {
@@ -722,7 +724,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
ieee80211_recalc_idle(local);
- if (local->ops->hw_scan) {
+ if (hw_scan) {
WARN_ON(!ieee80211_prep_hw_scan(local));
rc = drv_hw_scan(local, sdata, local->hw_scan_req);
} else {
@@ -740,6 +742,18 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
RCU_INIT_POINTER(local->scan_sdata, NULL);
}
+ if (hw_scan && rc == 1) {
+ /*
+ * we can't fall back to software for P2P-GO
+ * as it must update NoA etc.
+ */
+ if (ieee80211_vif_type_p2p(&sdata->vif) ==
+ NL80211_IFTYPE_P2P_GO)
+ return -EOPNOTSUPP;
+ hw_scan = false;
+ goto again;
+ }
+
return rc;
}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index fb8c2252ac0e..c4a8f115ed33 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -2253,11 +2253,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
}
if (tidstats && !cfg80211_sinfo_alloc_tid_stats(sinfo, GFP_KERNEL)) {
- for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) {
- struct cfg80211_tid_stats *tidstats = &sinfo->pertid[i];
-
- sta_set_tidstats(sta, tidstats, i);
- }
+ for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++)
+ sta_set_tidstats(sta, &sinfo->pertid[i], i);
}
if (ieee80211_vif_is_mesh(&sdata->vif)) {
@@ -2267,7 +2264,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
BIT_ULL(NL80211_STA_INFO_PLINK_STATE) |
BIT_ULL(NL80211_STA_INFO_LOCAL_PM) |
BIT_ULL(NL80211_STA_INFO_PEER_PM) |
- BIT_ULL(NL80211_STA_INFO_NONPEER_PM);
+ BIT_ULL(NL80211_STA_INFO_NONPEER_PM) |
+ BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_GATE);
sinfo->llid = sta->mesh->llid;
sinfo->plid = sta->mesh->plid;
@@ -2279,6 +2277,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
sinfo->local_pm = sta->mesh->local_pm;
sinfo->peer_pm = sta->mesh->peer_pm;
sinfo->nonpeer_pm = sta->mesh->nonpeer_pm;
+ sinfo->connected_to_gate = sta->mesh->connected_to_gate;
#endif
}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 9a04327d71d1..8eb29041be54 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -364,6 +364,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8)
* @nonpeer_pm: STA power save mode towards non-peer neighbors
* @processed_beacon: set to true after peer rates and capabilities are
* processed
+ * @connected_to_gate: true if mesh STA has a path to a mesh gate
* @fail_avg: moving percentage of failed MSDUs
*/
struct mesh_sta {
@@ -381,6 +382,7 @@ struct mesh_sta {
u8 plink_retries;
bool processed_beacon;
+ bool connected_to_gate;
enum nl80211_plink_state plink_state;
u32 plink_timeout;
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 588c51a67c89..35ea0dcb55e6 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1052,10 +1052,10 @@ TRACE_EVENT(drv_ampdu_action,
);
TRACE_EVENT(drv_get_survey,
- TP_PROTO(struct ieee80211_local *local, int idx,
+ TP_PROTO(struct ieee80211_local *local, int _idx,
struct survey_info *survey),
- TP_ARGS(local, idx, survey),
+ TP_ARGS(local, _idx, survey),
TP_STRUCT__entry(
LOCAL_ENTRY
@@ -1064,7 +1064,7 @@ TRACE_EVENT(drv_get_survey,
TP_fast_assign(
LOCAL_ASSIGN;
- __entry->idx = idx;
+ __entry->idx = _idx;
),
TP_printk(
@@ -1882,6 +1882,18 @@ TRACE_EVENT(drv_del_nan_func,
)
);
+DEFINE_EVENT(local_sdata_evt, drv_start_pmsr,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata),
+ TP_ARGS(local, sdata)
+);
+
+DEFINE_EVENT(local_sdata_evt, drv_abort_pmsr,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata),
+ TP_ARGS(local, sdata)
+);
+
/*
* Tracing for API calls that drivers call.
*/
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1f536ba573b4..f170d6c6629a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3218,6 +3218,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
return false;
+ if (skb_is_gso(skb))
+ return false;
+
if (!txq)
return false;
@@ -3242,7 +3245,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
tin = &txqi->tin;
flow = fq_flow_classify(fq, tin, skb, fq_flow_get_default_func);
head = skb_peek_tail(&flow->queue);
- if (!head)
+ if (!head || skb_is_gso(head))
goto out;
orig_len = head->len;
@@ -3583,7 +3586,7 @@ begin:
skb_queue_splice_tail(&tx.skbs, &txqi->frags);
}
- if (skb && skb_has_frag_list(skb) &&
+ if (skb_has_frag_list(skb) &&
!ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) {
if (skb_linearize(skb)) {
ieee80211_free_txskb(&local->hw, skb);
@@ -4579,7 +4582,7 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
IEEE80211_STYPE_NULLFUNC |
IEEE80211_FCTL_TODS);
if (qos) {
- __le16 qos = cpu_to_le16(7);
+ __le16 qoshdr = cpu_to_le16(7);
BUILD_BUG_ON((IEEE80211_STYPE_QOS_NULLFUNC |
IEEE80211_STYPE_NULLFUNC) !=
@@ -4588,7 +4591,7 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
cpu_to_le16(IEEE80211_STYPE_QOS_NULLFUNC);
skb->priority = 7;
skb_set_queue_mapping(skb, IEEE80211_AC_VO);
- skb_put_data(skb, &qos, sizeof(qos));
+ skb_put_data(skb, &qoshdr, sizeof(qoshdr));
}
memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index bec424316ea4..d0eb38b890aa 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -299,16 +299,16 @@ out:
spin_unlock_bh(&fq->lock);
}
-void ieee80211_wake_txqs(unsigned long data)
+static void
+__releases(&local->queue_stop_reason_lock)
+__acquires(&local->queue_stop_reason_lock)
+_ieee80211_wake_txqs(struct ieee80211_local *local, unsigned long *flags)
{
- struct ieee80211_local *local = (struct ieee80211_local *)data;
struct ieee80211_sub_if_data *sdata;
int n_acs = IEEE80211_NUM_ACS;
- unsigned long flags;
int i;
rcu_read_lock();
- spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
if (local->hw.queues < IEEE80211_NUM_ACS)
n_acs = 1;
@@ -317,7 +317,7 @@ void ieee80211_wake_txqs(unsigned long data)
if (local->queue_stop_reasons[i])
continue;
- spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+ spin_unlock_irqrestore(&local->queue_stop_reason_lock, *flags);
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
int ac;
@@ -329,13 +329,22 @@ void ieee80211_wake_txqs(unsigned long data)
__ieee80211_wake_txqs(sdata, ac);
}
}
- spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+ spin_lock_irqsave(&local->queue_stop_reason_lock, *flags);
}
- spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
rcu_read_unlock();
}
+void ieee80211_wake_txqs(unsigned long data)
+{
+ struct ieee80211_local *local = (struct ieee80211_local *)data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+ _ieee80211_wake_txqs(local, &flags);
+ spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+}
+
void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
{
struct ieee80211_sub_if_data *sdata;
@@ -371,7 +380,8 @@ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue)
static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
enum queue_stop_reason reason,
- bool refcounted)
+ bool refcounted,
+ unsigned long *flags)
{
struct ieee80211_local *local = hw_to_local(hw);
@@ -405,8 +415,19 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
} else
tasklet_schedule(&local->tx_pending_tasklet);
- if (local->ops->wake_tx_queue)
- tasklet_schedule(&local->wake_txqs_tasklet);
+ /*
+ * Calling _ieee80211_wake_txqs here can be a problem because it may
+ * release queue_stop_reason_lock which has been taken by
+ * __ieee80211_wake_queue's caller. It is certainly not very nice to
+ * release someone's lock, but it is fine because all the callers of
+ * __ieee80211_wake_queue call it right before releasing the lock.
+ */
+ if (local->ops->wake_tx_queue) {
+ if (reason == IEEE80211_QUEUE_STOP_REASON_DRIVER)
+ tasklet_schedule(&local->wake_txqs_tasklet);
+ else
+ _ieee80211_wake_txqs(local, flags);
+ }
}
void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
@@ -417,7 +438,7 @@ void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
unsigned long flags;
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
- __ieee80211_wake_queue(hw, queue, reason, refcounted);
+ __ieee80211_wake_queue(hw, queue, reason, refcounted, &flags);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
}
@@ -514,7 +535,7 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local,
false);
__skb_queue_tail(&local->pending[queue], skb);
__ieee80211_wake_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
- false);
+ false, &flags);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
}
@@ -547,7 +568,7 @@ void ieee80211_add_pending_skbs(struct ieee80211_local *local,
for (i = 0; i < hw->queues; i++)
__ieee80211_wake_queue(hw, i,
IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
- false);
+ false, &flags);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
}
@@ -605,7 +626,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
for_each_set_bit(i, &queues, hw->queues)
- __ieee80211_wake_queue(hw, i, reason, refcounted);
+ __ieee80211_wake_queue(hw, i, reason, refcounted, &flags);
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
}
@@ -1202,6 +1223,8 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
if (pos[0] == WLAN_EID_EXT_HE_MU_EDCA &&
elen >= (sizeof(*elems->mu_edca_param_set) + 1)) {
elems->mu_edca_param_set = (void *)&pos[1];
+ if (calc_crc)
+ crc = crc32_be(crc, pos - 2, elen + 2);
} else if (pos[0] == WLAN_EID_EXT_HE_CAPABILITY) {
elems->he_cap = (void *)&pos[1];
elems->he_cap_len = elen - 1;
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 1dae77c54009..87505600dbb2 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -73,10 +73,15 @@ enum {
#define NCSI_OEM_MFR_BCM_ID 0x113d
/* Broadcom specific OEM Command */
#define NCSI_OEM_BCM_CMD_GMA 0x01 /* CMD ID for Get MAC */
+/* Mellanox specific OEM Command */
+#define NCSI_OEM_MLX_CMD_GMA 0x00 /* CMD ID for Get MAC */
+#define NCSI_OEM_MLX_CMD_GMA_PARAM 0x1b /* Parameter for GMA */
/* OEM Command payload lengths*/
#define NCSI_OEM_BCM_CMD_GMA_LEN 12
+#define NCSI_OEM_MLX_CMD_GMA_LEN 8
/* Mac address offset in OEM response */
#define BCM_MAC_ADDR_OFFSET 28
+#define MLX_MAC_ADDR_OFFSET 8
struct ncsi_channel_version {
@@ -222,6 +227,10 @@ struct ncsi_package {
unsigned int channel_num; /* Number of channels */
struct list_head channels; /* List of chanels */
struct list_head node; /* Form list of packages */
+
+ bool multi_channel; /* Enable multiple channels */
+ u32 channel_whitelist; /* Channels to configure */
+ struct ncsi_channel *preferred_channel; /* Primary channel */
};
struct ncsi_request {
@@ -287,16 +296,16 @@ struct ncsi_dev_priv {
#define NCSI_DEV_PROBED 1 /* Finalized NCSI topology */
#define NCSI_DEV_HWA 2 /* Enabled HW arbitration */
#define NCSI_DEV_RESHUFFLE 4
+#define NCSI_DEV_RESET 8 /* Reset state of NC */
unsigned int gma_flag; /* OEM GMA flag */
spinlock_t lock; /* Protect the NCSI device */
#if IS_ENABLED(CONFIG_IPV6)
unsigned int inet6_addr_num; /* Number of IPv6 addresses */
#endif
+ unsigned int package_probe_id;/* Current ID during probe */
unsigned int package_num; /* Number of packages */
struct list_head packages; /* List of packages */
struct ncsi_channel *hot_channel; /* Channel was ever active */
- struct ncsi_package *force_package; /* Force a specific package */
- struct ncsi_channel *force_channel; /* Force a specific channel */
struct ncsi_request requests[256]; /* Request table */
unsigned int request_id; /* Last used request ID */
#define NCSI_REQ_START_IDX 1
@@ -309,6 +318,9 @@ struct ncsi_dev_priv {
struct list_head node; /* Form NCSI device list */
#define NCSI_MAX_VLAN_VIDS 15
struct list_head vlan_vids; /* List of active VLAN IDs */
+
+ bool multi_package; /* Enable multiple packages */
+ u32 package_whitelist; /* Packages to configure */
};
struct ncsi_cmd_arg {
@@ -341,6 +353,7 @@ extern spinlock_t ncsi_dev_lock;
list_for_each_entry_rcu(nc, &np->channels, node)
/* Resources */
+int ncsi_reset_dev(struct ncsi_dev *nd);
void ncsi_start_channel_monitor(struct ncsi_channel *nc);
void ncsi_stop_channel_monitor(struct ncsi_channel *nc);
struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
@@ -361,6 +374,13 @@ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp,
void ncsi_free_request(struct ncsi_request *nr);
struct ncsi_dev *ncsi_find_dev(struct net_device *dev);
int ncsi_process_next_channel(struct ncsi_dev_priv *ndp);
+bool ncsi_channel_has_link(struct ncsi_channel *channel);
+bool ncsi_channel_is_last(struct ncsi_dev_priv *ndp,
+ struct ncsi_channel *channel);
+int ncsi_update_tx_channel(struct ncsi_dev_priv *ndp,
+ struct ncsi_package *np,
+ struct ncsi_channel *disable,
+ struct ncsi_channel *enable);
/* Packet handlers */
u32 ncsi_calculate_checksum(unsigned char *data, int len);
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 25e483e8278b..26d67e27551f 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -50,13 +50,15 @@ static int ncsi_validate_aen_pkt(struct ncsi_aen_pkt_hdr *h,
static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
struct ncsi_aen_pkt_hdr *h)
{
- struct ncsi_aen_lsc_pkt *lsc;
- struct ncsi_channel *nc;
+ struct ncsi_channel *nc, *tmp;
struct ncsi_channel_mode *ncm;
- bool chained;
- int state;
unsigned long old_data, data;
+ struct ncsi_aen_lsc_pkt *lsc;
+ struct ncsi_package *np;
+ bool had_link, has_link;
unsigned long flags;
+ bool chained;
+ int state;
/* Find the NCSI channel */
ncsi_find_package_and_channel(ndp, h->common.channel, NULL, &nc);
@@ -73,6 +75,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
ncm->data[2] = data;
ncm->data[4] = ntohl(lsc->oem_status);
+ had_link = !!(old_data & 0x1);
+ has_link = !!(data & 0x1);
+
netdev_dbg(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n",
nc->id, data & 0x1 ? "up" : "down");
@@ -80,22 +85,60 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
state = nc->state;
spin_unlock_irqrestore(&nc->lock, flags);
- if (!((old_data ^ data) & 0x1) || chained)
- return 0;
- if (!(state == NCSI_CHANNEL_INACTIVE && (data & 0x1)) &&
- !(state == NCSI_CHANNEL_ACTIVE && !(data & 0x1)))
+ if (state == NCSI_CHANNEL_INACTIVE)
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: Inactive channel %u received AEN!\n",
+ nc->id);
+
+ if ((had_link == has_link) || chained)
return 0;
- if (!(ndp->flags & NCSI_DEV_HWA) &&
- state == NCSI_CHANNEL_ACTIVE)
- ndp->flags |= NCSI_DEV_RESHUFFLE;
+ if (!ndp->multi_package && !nc->package->multi_channel) {
+ if (had_link) {
+ ndp->flags |= NCSI_DEV_RESHUFFLE;
+ ncsi_stop_channel_monitor(nc);
+ spin_lock_irqsave(&ndp->lock, flags);
+ list_add_tail_rcu(&nc->link, &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ return ncsi_process_next_channel(ndp);
+ }
+ /* Configured channel came up */
+ return 0;
+ }
- ncsi_stop_channel_monitor(nc);
- spin_lock_irqsave(&ndp->lock, flags);
- list_add_tail_rcu(&nc->link, &ndp->channel_queue);
- spin_unlock_irqrestore(&ndp->lock, flags);
+ if (had_link) {
+ ncm = &nc->modes[NCSI_MODE_TX_ENABLE];
+ if (ncsi_channel_is_last(ndp, nc)) {
+ /* No channels left, reconfigure */
+ return ncsi_reset_dev(&ndp->ndev);
+ } else if (ncm->enable) {
+ /* Need to failover Tx channel */
+ ncsi_update_tx_channel(ndp, nc->package, nc, NULL);
+ }
+ } else if (has_link && nc->package->preferred_channel == nc) {
+ /* Return Tx to preferred channel */
+ ncsi_update_tx_channel(ndp, nc->package, NULL, nc);
+ } else if (has_link) {
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ NCSI_FOR_EACH_CHANNEL(np, tmp) {
+ /* Enable Tx on this channel if the current Tx
+ * channel is down.
+ */
+ ncm = &tmp->modes[NCSI_MODE_TX_ENABLE];
+ if (ncm->enable &&
+ !ncsi_channel_has_link(tmp)) {
+ ncsi_update_tx_channel(ndp, nc->package,
+ tmp, nc);
+ break;
+ }
+ }
+ }
+ }
- return ncsi_process_next_channel(ndp);
+ /* Leave configured channels active in a multi-channel scenario so
+ * AEN events are still received.
+ */
+ return 0;
}
static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp,
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index bfc43b28c7a6..31359d5e14ad 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -28,6 +28,29 @@
LIST_HEAD(ncsi_dev_list);
DEFINE_SPINLOCK(ncsi_dev_lock);
+bool ncsi_channel_has_link(struct ncsi_channel *channel)
+{
+ return !!(channel->modes[NCSI_MODE_LINK].data[2] & 0x1);
+}
+
+bool ncsi_channel_is_last(struct ncsi_dev_priv *ndp,
+ struct ncsi_channel *channel)
+{
+ struct ncsi_package *np;
+ struct ncsi_channel *nc;
+
+ NCSI_FOR_EACH_PACKAGE(ndp, np)
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ if (nc == channel)
+ continue;
+ if (nc->state == NCSI_CHANNEL_ACTIVE &&
+ ncsi_channel_has_link(nc))
+ return false;
+ }
+
+ return true;
+}
+
static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down)
{
struct ncsi_dev *nd = &ndp->ndev;
@@ -52,7 +75,7 @@ static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down)
continue;
}
- if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
+ if (ncsi_channel_has_link(nc)) {
spin_unlock_irqrestore(&nc->lock, flags);
nd->link_up = 1;
goto report;
@@ -113,10 +136,8 @@ static void ncsi_channel_monitor(struct timer_list *t)
default:
netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n",
nc->id);
- if (!(ndp->flags & NCSI_DEV_HWA)) {
- ncsi_report_link(ndp, true);
- ndp->flags |= NCSI_DEV_RESHUFFLE;
- }
+ ncsi_report_link(ndp, true);
+ ndp->flags |= NCSI_DEV_RESHUFFLE;
ncsi_stop_channel_monitor(nc);
@@ -269,6 +290,7 @@ struct ncsi_package *ncsi_add_package(struct ncsi_dev_priv *ndp,
np->ndp = ndp;
spin_lock_init(&np->lock);
INIT_LIST_HEAD(&np->channels);
+ np->channel_whitelist = UINT_MAX;
spin_lock_irqsave(&ndp->lock, flags);
tmp = ncsi_find_package(ndp, id);
@@ -442,12 +464,14 @@ static void ncsi_request_timeout(struct timer_list *t)
static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
{
struct ncsi_dev *nd = &ndp->ndev;
- struct ncsi_package *np = ndp->active_package;
- struct ncsi_channel *nc = ndp->active_channel;
+ struct ncsi_package *np;
+ struct ncsi_channel *nc, *tmp;
struct ncsi_cmd_arg nca;
unsigned long flags;
int ret;
+ np = ndp->active_package;
+ nc = ndp->active_channel;
nca.ndp = ndp;
nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN;
switch (nd->state) {
@@ -523,6 +547,15 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
if (ret)
goto error;
+ NCSI_FOR_EACH_CHANNEL(np, tmp) {
+ /* If there is another channel active on this package
+ * do not deselect the package.
+ */
+ if (tmp != nc && tmp->state == NCSI_CHANNEL_ACTIVE) {
+ nd->state = ncsi_dev_state_suspend_done;
+ break;
+ }
+ }
break;
case ncsi_dev_state_suspend_deselect:
ndp->pending_req_num = 1;
@@ -541,8 +574,10 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
spin_lock_irqsave(&nc->lock, flags);
nc->state = NCSI_CHANNEL_INACTIVE;
spin_unlock_irqrestore(&nc->lock, flags);
- ncsi_process_next_channel(ndp);
-
+ if (ndp->flags & NCSI_DEV_RESET)
+ ncsi_reset_dev(nd);
+ else
+ ncsi_process_next_channel(ndp);
break;
default:
netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n",
@@ -675,12 +710,38 @@ static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca)
return ret;
}
+static int ncsi_oem_gma_handler_mlx(struct ncsi_cmd_arg *nca)
+{
+ union {
+ u8 data_u8[NCSI_OEM_MLX_CMD_GMA_LEN];
+ u32 data_u32[NCSI_OEM_MLX_CMD_GMA_LEN / sizeof(u32)];
+ } u;
+ int ret = 0;
+
+ nca->payload = NCSI_OEM_MLX_CMD_GMA_LEN;
+
+ memset(&u, 0, sizeof(u));
+ u.data_u32[0] = ntohl(NCSI_OEM_MFR_MLX_ID);
+ u.data_u8[5] = NCSI_OEM_MLX_CMD_GMA;
+ u.data_u8[6] = NCSI_OEM_MLX_CMD_GMA_PARAM;
+
+ nca->data = u.data_u8;
+
+ ret = ncsi_xmit_cmd(nca);
+ if (ret)
+ netdev_err(nca->ndp->ndev.dev,
+ "NCSI: Failed to transmit cmd 0x%x during configure\n",
+ nca->type);
+ return ret;
+}
+
/* OEM Command handlers initialization */
static struct ncsi_oem_gma_handler {
unsigned int mfr_id;
int (*handler)(struct ncsi_cmd_arg *nca);
} ncsi_oem_gma_handlers[] = {
- { NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm }
+ { NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm },
+ { NCSI_OEM_MFR_MLX_ID, ncsi_oem_gma_handler_mlx }
};
static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
@@ -717,13 +778,144 @@ static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
+/* Determine if a given channel from the channel_queue should be used for Tx */
+static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp,
+ struct ncsi_channel *nc)
+{
+ struct ncsi_channel_mode *ncm;
+ struct ncsi_channel *channel;
+ struct ncsi_package *np;
+
+ /* Check if any other channel has Tx enabled; a channel may have already
+ * been configured and removed from the channel queue.
+ */
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ if (!ndp->multi_package && np != nc->package)
+ continue;
+ NCSI_FOR_EACH_CHANNEL(np, channel) {
+ ncm = &channel->modes[NCSI_MODE_TX_ENABLE];
+ if (ncm->enable)
+ return false;
+ }
+ }
+
+ /* This channel is the preferred channel and has link */
+ list_for_each_entry_rcu(channel, &ndp->channel_queue, link) {
+ np = channel->package;
+ if (np->preferred_channel &&
+ ncsi_channel_has_link(np->preferred_channel)) {
+ return np->preferred_channel == nc;
+ }
+ }
+
+ /* This channel has link */
+ if (ncsi_channel_has_link(nc))
+ return true;
+
+ list_for_each_entry_rcu(channel, &ndp->channel_queue, link)
+ if (ncsi_channel_has_link(channel))
+ return false;
+
+ /* No other channel has link; default to this one */
+ return true;
+}
+
+/* Change the active Tx channel in a multi-channel setup */
+int ncsi_update_tx_channel(struct ncsi_dev_priv *ndp,
+ struct ncsi_package *package,
+ struct ncsi_channel *disable,
+ struct ncsi_channel *enable)
+{
+ struct ncsi_cmd_arg nca;
+ struct ncsi_channel *nc;
+ struct ncsi_package *np;
+ int ret = 0;
+
+ if (!package->multi_channel && !ndp->multi_package)
+ netdev_warn(ndp->ndev.dev,
+ "NCSI: Trying to update Tx channel in single-channel mode\n");
+ nca.ndp = ndp;
+ nca.req_flags = 0;
+
+ /* Find current channel with Tx enabled */
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ if (disable)
+ break;
+ if (!ndp->multi_package && np != package)
+ continue;
+
+ NCSI_FOR_EACH_CHANNEL(np, nc)
+ if (nc->modes[NCSI_MODE_TX_ENABLE].enable) {
+ disable = nc;
+ break;
+ }
+ }
+
+ /* Find a suitable channel for Tx */
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ if (enable)
+ break;
+ if (!ndp->multi_package && np != package)
+ continue;
+ if (!(ndp->package_whitelist & (0x1 << np->id)))
+ continue;
+
+ if (np->preferred_channel &&
+ ncsi_channel_has_link(np->preferred_channel)) {
+ enable = np->preferred_channel;
+ break;
+ }
+
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ if (!(np->channel_whitelist & 0x1 << nc->id))
+ continue;
+ if (nc->state != NCSI_CHANNEL_ACTIVE)
+ continue;
+ if (ncsi_channel_has_link(nc)) {
+ enable = nc;
+ break;
+ }
+ }
+ }
+
+ if (disable == enable)
+ return -1;
+
+ if (!enable)
+ return -1;
+
+ if (disable) {
+ nca.channel = disable->id;
+ nca.package = disable->package->id;
+ nca.type = NCSI_PKT_CMD_DCNT;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ netdev_err(ndp->ndev.dev,
+ "Error %d sending DCNT\n",
+ ret);
+ }
+
+ netdev_info(ndp->ndev.dev, "NCSI: channel %u enables Tx\n", enable->id);
+
+ nca.channel = enable->id;
+ nca.package = enable->package->id;
+ nca.type = NCSI_PKT_CMD_ECNT;
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ netdev_err(ndp->ndev.dev,
+ "Error %d sending ECNT\n",
+ ret);
+
+ return ret;
+}
+
static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
{
- struct ncsi_dev *nd = &ndp->ndev;
- struct net_device *dev = nd->dev;
struct ncsi_package *np = ndp->active_package;
struct ncsi_channel *nc = ndp->active_channel;
struct ncsi_channel *hot_nc = NULL;
+ struct ncsi_dev *nd = &ndp->ndev;
+ struct net_device *dev = nd->dev;
struct ncsi_cmd_arg nca;
unsigned char index;
unsigned long flags;
@@ -845,20 +1037,29 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
} else if (nd->state == ncsi_dev_state_config_ebf) {
nca.type = NCSI_PKT_CMD_EBF;
nca.dwords[0] = nc->caps[NCSI_CAP_BC].cap;
- nd->state = ncsi_dev_state_config_ecnt;
+ if (ncsi_channel_is_tx(ndp, nc))
+ nd->state = ncsi_dev_state_config_ecnt;
+ else
+ nd->state = ncsi_dev_state_config_ec;
#if IS_ENABLED(CONFIG_IPV6)
if (ndp->inet6_addr_num > 0 &&
(nc->caps[NCSI_CAP_GENERIC].cap &
NCSI_CAP_GENERIC_MC))
nd->state = ncsi_dev_state_config_egmf;
- else
- nd->state = ncsi_dev_state_config_ecnt;
} else if (nd->state == ncsi_dev_state_config_egmf) {
nca.type = NCSI_PKT_CMD_EGMF;
nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
- nd->state = ncsi_dev_state_config_ecnt;
+ if (ncsi_channel_is_tx(ndp, nc))
+ nd->state = ncsi_dev_state_config_ecnt;
+ else
+ nd->state = ncsi_dev_state_config_ec;
#endif /* CONFIG_IPV6 */
} else if (nd->state == ncsi_dev_state_config_ecnt) {
+ if (np->preferred_channel &&
+ nc != np->preferred_channel)
+ netdev_info(ndp->ndev.dev,
+ "NCSI: Tx failed over to channel %u\n",
+ nc->id);
nca.type = NCSI_PKT_CMD_ECNT;
nd->state = ncsi_dev_state_config_ec;
} else if (nd->state == ncsi_dev_state_config_ec) {
@@ -889,6 +1090,16 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
netdev_dbg(ndp->ndev.dev, "NCSI: channel %u config done\n",
nc->id);
spin_lock_irqsave(&nc->lock, flags);
+ nc->state = NCSI_CHANNEL_ACTIVE;
+
+ if (ndp->flags & NCSI_DEV_RESET) {
+ /* A reset event happened during config, start it now */
+ nc->reconfigure_needed = false;
+ spin_unlock_irqrestore(&nc->lock, flags);
+ ncsi_reset_dev(nd);
+ break;
+ }
+
if (nc->reconfigure_needed) {
/* This channel's configuration has been updated
* part-way during the config state - start the
@@ -909,10 +1120,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
hot_nc = nc;
- nc->state = NCSI_CHANNEL_ACTIVE;
} else {
hot_nc = NULL;
- nc->state = NCSI_CHANNEL_INACTIVE;
netdev_dbg(ndp->ndev.dev,
"NCSI: channel %u link down after config\n",
nc->id);
@@ -940,43 +1149,35 @@ error:
static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
{
- struct ncsi_package *np, *force_package;
- struct ncsi_channel *nc, *found, *hot_nc, *force_channel;
+ struct ncsi_channel *nc, *found, *hot_nc;
struct ncsi_channel_mode *ncm;
- unsigned long flags;
+ unsigned long flags, cflags;
+ struct ncsi_package *np;
+ bool with_link;
spin_lock_irqsave(&ndp->lock, flags);
hot_nc = ndp->hot_channel;
- force_channel = ndp->force_channel;
- force_package = ndp->force_package;
spin_unlock_irqrestore(&ndp->lock, flags);
- /* Force a specific channel whether or not it has link if we have been
- * configured to do so
- */
- if (force_package && force_channel) {
- found = force_channel;
- ncm = &found->modes[NCSI_MODE_LINK];
- if (!(ncm->data[2] & 0x1))
- netdev_info(ndp->ndev.dev,
- "NCSI: Channel %u forced, but it is link down\n",
- found->id);
- goto out;
- }
-
- /* The search is done once an inactive channel with up
- * link is found.
+ /* By default the search is done once an inactive channel with up
+ * link is found, unless a preferred channel is set.
+ * If multi_package or multi_channel are configured all channels in the
+ * whitelist are added to the channel queue.
*/
found = NULL;
+ with_link = false;
NCSI_FOR_EACH_PACKAGE(ndp, np) {
- if (ndp->force_package && np != ndp->force_package)
+ if (!(ndp->package_whitelist & (0x1 << np->id)))
continue;
NCSI_FOR_EACH_CHANNEL(np, nc) {
- spin_lock_irqsave(&nc->lock, flags);
+ if (!(np->channel_whitelist & (0x1 << nc->id)))
+ continue;
+
+ spin_lock_irqsave(&nc->lock, cflags);
if (!list_empty(&nc->link) ||
nc->state != NCSI_CHANNEL_INACTIVE) {
- spin_unlock_irqrestore(&nc->lock, flags);
+ spin_unlock_irqrestore(&nc->lock, cflags);
continue;
}
@@ -988,32 +1189,49 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
ncm = &nc->modes[NCSI_MODE_LINK];
if (ncm->data[2] & 0x1) {
- spin_unlock_irqrestore(&nc->lock, flags);
found = nc;
- goto out;
+ with_link = true;
}
- spin_unlock_irqrestore(&nc->lock, flags);
+ /* If multi_channel is enabled configure all valid
+ * channels whether or not they currently have link
+ * so they will have AENs enabled.
+ */
+ if (with_link || np->multi_channel) {
+ spin_lock_irqsave(&ndp->lock, flags);
+ list_add_tail_rcu(&nc->link,
+ &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ netdev_dbg(ndp->ndev.dev,
+ "NCSI: Channel %u added to queue (link %s)\n",
+ nc->id,
+ ncm->data[2] & 0x1 ? "up" : "down");
+ }
+
+ spin_unlock_irqrestore(&nc->lock, cflags);
+
+ if (with_link && !np->multi_channel)
+ break;
}
+ if (with_link && !ndp->multi_package)
+ break;
}
- if (!found) {
+ if (list_empty(&ndp->channel_queue) && found) {
+ netdev_info(ndp->ndev.dev,
+ "NCSI: No channel with link found, configuring channel %u\n",
+ found->id);
+ spin_lock_irqsave(&ndp->lock, flags);
+ list_add_tail_rcu(&found->link, &ndp->channel_queue);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ } else if (!found) {
netdev_warn(ndp->ndev.dev,
- "NCSI: No channel found with link\n");
+ "NCSI: No channel found to configure!\n");
ncsi_report_link(ndp, true);
return -ENODEV;
}
- ncm = &found->modes[NCSI_MODE_LINK];
- netdev_dbg(ndp->ndev.dev,
- "NCSI: Channel %u added to queue (link %s)\n",
- found->id, ncm->data[2] & 0x1 ? "up" : "down");
-
-out:
- spin_lock_irqsave(&ndp->lock, flags);
- list_add_tail_rcu(&found->link, &ndp->channel_queue);
- spin_unlock_irqrestore(&ndp->lock, flags);
-
return ncsi_process_next_channel(ndp);
}
@@ -1050,35 +1268,6 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp)
return false;
}
-static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp)
-{
- struct ncsi_package *np;
- struct ncsi_channel *nc;
- unsigned long flags;
-
- /* Move all available channels to processing queue */
- spin_lock_irqsave(&ndp->lock, flags);
- NCSI_FOR_EACH_PACKAGE(ndp, np) {
- NCSI_FOR_EACH_CHANNEL(np, nc) {
- WARN_ON_ONCE(nc->state != NCSI_CHANNEL_INACTIVE ||
- !list_empty(&nc->link));
- ncsi_stop_channel_monitor(nc);
- list_add_tail_rcu(&nc->link, &ndp->channel_queue);
- }
- }
- spin_unlock_irqrestore(&ndp->lock, flags);
-
- /* We can have no channels in extremely case */
- if (list_empty(&ndp->channel_queue)) {
- netdev_err(ndp->ndev.dev,
- "NCSI: No available channels for HWA\n");
- ncsi_report_link(ndp, false);
- return -ENOENT;
- }
-
- return ncsi_process_next_channel(ndp);
-}
-
static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
{
struct ncsi_dev *nd = &ndp->ndev;
@@ -1110,70 +1299,28 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_package;
break;
case ncsi_dev_state_probe_package:
- ndp->pending_req_num = 16;
+ ndp->pending_req_num = 1;
- /* Select all possible packages */
nca.type = NCSI_PKT_CMD_SP;
nca.bytes[0] = 1;
+ nca.package = ndp->package_probe_id;
nca.channel = NCSI_RESERVED_CHANNEL;
- for (index = 0; index < 8; index++) {
- nca.package = index;
- ret = ncsi_xmit_cmd(&nca);
- if (ret)
- goto error;
- }
-
- /* Disable all possible packages */
- nca.type = NCSI_PKT_CMD_DP;
- for (index = 0; index < 8; index++) {
- nca.package = index;
- ret = ncsi_xmit_cmd(&nca);
- if (ret)
- goto error;
- }
-
+ ret = ncsi_xmit_cmd(&nca);
+ if (ret)
+ goto error;
nd->state = ncsi_dev_state_probe_channel;
break;
case ncsi_dev_state_probe_channel:
- if (!ndp->active_package)
- ndp->active_package = list_first_or_null_rcu(
- &ndp->packages, struct ncsi_package, node);
- else if (list_is_last(&ndp->active_package->node,
- &ndp->packages))
- ndp->active_package = NULL;
- else
- ndp->active_package = list_next_entry(
- ndp->active_package, node);
-
- /* All available packages and channels are enumerated. The
- * enumeration happens for once when the NCSI interface is
- * started. So we need continue to start the interface after
- * the enumeration.
- *
- * We have to choose an active channel before configuring it.
- * Note that we possibly don't have active channel in extreme
- * situation.
- */
+ ndp->active_package = ncsi_find_package(ndp,
+ ndp->package_probe_id);
if (!ndp->active_package) {
- ndp->flags |= NCSI_DEV_PROBED;
- if (ncsi_check_hwa(ndp))
- ncsi_enable_hwa(ndp);
- else
- ncsi_choose_active_channel(ndp);
- return;
+ /* No response */
+ nd->state = ncsi_dev_state_probe_dp;
+ schedule_work(&ndp->work);
+ break;
}
-
- /* Select the active package */
- ndp->pending_req_num = 1;
- nca.type = NCSI_PKT_CMD_SP;
- nca.bytes[0] = 1;
- nca.package = ndp->active_package->id;
- nca.channel = NCSI_RESERVED_CHANNEL;
- ret = ncsi_xmit_cmd(&nca);
- if (ret)
- goto error;
-
nd->state = ncsi_dev_state_probe_cis;
+ schedule_work(&ndp->work);
break;
case ncsi_dev_state_probe_cis:
ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
@@ -1222,22 +1369,35 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
case ncsi_dev_state_probe_dp:
ndp->pending_req_num = 1;
- /* Deselect the active package */
+ /* Deselect the current package */
nca.type = NCSI_PKT_CMD_DP;
- nca.package = ndp->active_package->id;
+ nca.package = ndp->package_probe_id;
nca.channel = NCSI_RESERVED_CHANNEL;
ret = ncsi_xmit_cmd(&nca);
if (ret)
goto error;
- /* Scan channels in next package */
- nd->state = ncsi_dev_state_probe_channel;
+ /* Probe next package */
+ ndp->package_probe_id++;
+ if (ndp->package_probe_id >= 8) {
+ /* Probe finished */
+ ndp->flags |= NCSI_DEV_PROBED;
+ break;
+ }
+ nd->state = ncsi_dev_state_probe_package;
+ ndp->active_package = NULL;
break;
default:
netdev_warn(nd->dev, "Wrong NCSI state 0x%0x in enumeration\n",
nd->state);
}
+ if (ndp->flags & NCSI_DEV_PROBED) {
+ /* Check if all packages have HWA support */
+ ncsi_check_hwa(ndp);
+ ncsi_choose_active_channel(ndp);
+ }
+
return;
error:
netdev_err(ndp->ndev.dev,
@@ -1556,6 +1716,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
INIT_LIST_HEAD(&ndp->channel_queue);
INIT_LIST_HEAD(&ndp->vlan_vids);
INIT_WORK(&ndp->work, ncsi_dev_work);
+ ndp->package_whitelist = UINT_MAX;
/* Initialize private NCSI device */
spin_lock_init(&ndp->lock);
@@ -1592,26 +1753,19 @@ EXPORT_SYMBOL_GPL(ncsi_register_dev);
int ncsi_start_dev(struct ncsi_dev *nd)
{
struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
- int ret;
if (nd->state != ncsi_dev_state_registered &&
nd->state != ncsi_dev_state_functional)
return -ENOTTY;
if (!(ndp->flags & NCSI_DEV_PROBED)) {
+ ndp->package_probe_id = 0;
nd->state = ncsi_dev_state_probe;
schedule_work(&ndp->work);
return 0;
}
- if (ndp->flags & NCSI_DEV_HWA) {
- netdev_info(ndp->ndev.dev, "NCSI: Enabling HWA mode\n");
- ret = ncsi_enable_hwa(ndp);
- } else {
- ret = ncsi_choose_active_channel(ndp);
- }
-
- return ret;
+ return ncsi_reset_dev(nd);
}
EXPORT_SYMBOL_GPL(ncsi_start_dev);
@@ -1624,7 +1778,10 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
int old_state;
unsigned long flags;
- /* Stop the channel monitor and reset channel's state */
+ /* Stop the channel monitor on any active channels. Don't reset the
+ * channel state so we know which were active when ncsi_start_dev()
+ * is next called.
+ */
NCSI_FOR_EACH_PACKAGE(ndp, np) {
NCSI_FOR_EACH_CHANNEL(np, nc) {
ncsi_stop_channel_monitor(nc);
@@ -1632,7 +1789,6 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
spin_lock_irqsave(&nc->lock, flags);
chained = !list_empty(&nc->link);
old_state = nc->state;
- nc->state = NCSI_CHANNEL_INACTIVE;
spin_unlock_irqrestore(&nc->lock, flags);
WARN_ON_ONCE(chained ||
@@ -1645,6 +1801,92 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
}
EXPORT_SYMBOL_GPL(ncsi_stop_dev);
+int ncsi_reset_dev(struct ncsi_dev *nd)
+{
+ struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
+ struct ncsi_channel *nc, *active, *tmp;
+ struct ncsi_package *np;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ndp->lock, flags);
+
+ if (!(ndp->flags & NCSI_DEV_RESET)) {
+ /* Haven't been called yet, check states */
+ switch (nd->state & ncsi_dev_state_major) {
+ case ncsi_dev_state_registered:
+ case ncsi_dev_state_probe:
+ /* Not even probed yet - do nothing */
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ return 0;
+ case ncsi_dev_state_suspend:
+ case ncsi_dev_state_config:
+ /* Wait for the channel to finish its suspend/config
+ * operation; once it finishes it will check for
+ * NCSI_DEV_RESET and reset the state.
+ */
+ ndp->flags |= NCSI_DEV_RESET;
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ return 0;
+ }
+ } else {
+ switch (nd->state) {
+ case ncsi_dev_state_suspend_done:
+ case ncsi_dev_state_config_done:
+ case ncsi_dev_state_functional:
+ /* Ok */
+ break;
+ default:
+ /* Current reset operation happening */
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ return 0;
+ }
+ }
+
+ if (!list_empty(&ndp->channel_queue)) {
+ /* Clear any channel queue we may have interrupted */
+ list_for_each_entry_safe(nc, tmp, &ndp->channel_queue, link)
+ list_del_init(&nc->link);
+ }
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ active = NULL;
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ NCSI_FOR_EACH_CHANNEL(np, nc) {
+ spin_lock_irqsave(&nc->lock, flags);
+
+ if (nc->state == NCSI_CHANNEL_ACTIVE) {
+ active = nc;
+ nc->state = NCSI_CHANNEL_INVISIBLE;
+ spin_unlock_irqrestore(&nc->lock, flags);
+ ncsi_stop_channel_monitor(nc);
+ break;
+ }
+
+ spin_unlock_irqrestore(&nc->lock, flags);
+ }
+ if (active)
+ break;
+ }
+
+ if (!active) {
+ /* Done */
+ spin_lock_irqsave(&ndp->lock, flags);
+ ndp->flags &= ~NCSI_DEV_RESET;
+ spin_unlock_irqrestore(&ndp->lock, flags);
+ return ncsi_choose_active_channel(ndp);
+ }
+
+ spin_lock_irqsave(&ndp->lock, flags);
+ ndp->flags |= NCSI_DEV_RESET;
+ ndp->active_channel = active;
+ ndp->active_package = active->package;
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ nd->state = ncsi_dev_state_suspend;
+ schedule_work(&ndp->work);
+ return 0;
+}
+
void ncsi_unregister_dev(struct ncsi_dev *nd)
{
struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 33314381b4f5..5d782445d2fc 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -30,6 +30,9 @@ static const struct nla_policy ncsi_genl_policy[NCSI_ATTR_MAX + 1] = {
[NCSI_ATTR_PACKAGE_ID] = { .type = NLA_U32 },
[NCSI_ATTR_CHANNEL_ID] = { .type = NLA_U32 },
[NCSI_ATTR_DATA] = { .type = NLA_BINARY, .len = 2048 },
+ [NCSI_ATTR_MULTI_FLAG] = { .type = NLA_FLAG },
+ [NCSI_ATTR_PACKAGE_MASK] = { .type = NLA_U32 },
+ [NCSI_ATTR_CHANNEL_MASK] = { .type = NLA_U32 },
};
static struct ncsi_dev_priv *ndp_from_ifindex(struct net *net, u32 ifindex)
@@ -69,7 +72,7 @@ static int ncsi_write_channel_info(struct sk_buff *skb,
nla_put_u32(skb, NCSI_CHANNEL_ATTR_LINK_STATE, m->data[2]);
if (nc->state == NCSI_CHANNEL_ACTIVE)
nla_put_flag(skb, NCSI_CHANNEL_ATTR_ACTIVE);
- if (ndp->force_channel == nc)
+ if (nc == nc->package->preferred_channel)
nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
@@ -114,7 +117,7 @@ static int ncsi_write_package_info(struct sk_buff *skb,
if (!pnest)
return -ENOMEM;
nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id);
- if (ndp->force_package == np)
+ if ((0x1 << np->id) == ndp->package_whitelist)
nla_put_flag(skb, NCSI_PKG_ATTR_FORCED);
cnest = nla_nest_start(skb, NCSI_PKG_ATTR_CHANNEL_LIST);
if (!cnest) {
@@ -290,49 +293,58 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
package = NULL;
- spin_lock_irqsave(&ndp->lock, flags);
-
NCSI_FOR_EACH_PACKAGE(ndp, np)
if (np->id == package_id)
package = np;
if (!package) {
/* The user has set a package that does not exist */
- spin_unlock_irqrestore(&ndp->lock, flags);
return -ERANGE;
}
channel = NULL;
- if (!info->attrs[NCSI_ATTR_CHANNEL_ID]) {
- /* Allow any channel */
- channel_id = NCSI_RESERVED_CHANNEL;
- } else {
+ if (info->attrs[NCSI_ATTR_CHANNEL_ID]) {
channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
NCSI_FOR_EACH_CHANNEL(package, nc)
- if (nc->id == channel_id)
+ if (nc->id == channel_id) {
channel = nc;
+ break;
+ }
+ if (!channel) {
+ netdev_info(ndp->ndev.dev,
+ "NCSI: Channel %u does not exist!\n",
+ channel_id);
+ return -ERANGE;
+ }
}
- if (channel_id != NCSI_RESERVED_CHANNEL && !channel) {
- /* The user has set a channel that does not exist on this
- * package
- */
- spin_unlock_irqrestore(&ndp->lock, flags);
- netdev_info(ndp->ndev.dev, "NCSI: Channel %u does not exist!\n",
- channel_id);
- return -ERANGE;
- }
-
- ndp->force_package = package;
- ndp->force_channel = channel;
+ spin_lock_irqsave(&ndp->lock, flags);
+ ndp->package_whitelist = 0x1 << package->id;
+ ndp->multi_package = false;
spin_unlock_irqrestore(&ndp->lock, flags);
- netdev_info(ndp->ndev.dev, "Set package 0x%x, channel 0x%x%s as preferred\n",
- package_id, channel_id,
- channel_id == NCSI_RESERVED_CHANNEL ? " (any)" : "");
+ spin_lock_irqsave(&package->lock, flags);
+ package->multi_channel = false;
+ if (channel) {
+ package->channel_whitelist = 0x1 << channel->id;
+ package->preferred_channel = channel;
+ } else {
+ /* Allow any channel */
+ package->channel_whitelist = UINT_MAX;
+ package->preferred_channel = NULL;
+ }
+ spin_unlock_irqrestore(&package->lock, flags);
+
+ if (channel)
+ netdev_info(ndp->ndev.dev,
+ "Set package 0x%x, channel 0x%x as preferred\n",
+ package_id, channel_id);
+ else
+ netdev_info(ndp->ndev.dev, "Set package 0x%x as preferred\n",
+ package_id);
- /* Bounce the NCSI channel to set changes */
- ncsi_stop_dev(&ndp->ndev);
- ncsi_start_dev(&ndp->ndev);
+ /* Update channel configuration */
+ if (!(ndp->flags & NCSI_DEV_RESET))
+ ncsi_reset_dev(&ndp->ndev);
return 0;
}
@@ -340,6 +352,7 @@ static int ncsi_set_interface_nl(struct sk_buff *msg, struct genl_info *info)
static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
{
struct ncsi_dev_priv *ndp;
+ struct ncsi_package *np;
unsigned long flags;
if (!info || !info->attrs)
@@ -353,16 +366,24 @@ static int ncsi_clear_interface_nl(struct sk_buff *msg, struct genl_info *info)
if (!ndp)
return -ENODEV;
- /* Clear any override */
+ /* Reset any whitelists and disable multi mode */
spin_lock_irqsave(&ndp->lock, flags);
- ndp->force_package = NULL;
- ndp->force_channel = NULL;
+ ndp->package_whitelist = UINT_MAX;
+ ndp->multi_package = false;
spin_unlock_irqrestore(&ndp->lock, flags);
+
+ NCSI_FOR_EACH_PACKAGE(ndp, np) {
+ spin_lock_irqsave(&np->lock, flags);
+ np->multi_channel = false;
+ np->channel_whitelist = UINT_MAX;
+ np->preferred_channel = NULL;
+ spin_unlock_irqrestore(&np->lock, flags);
+ }
netdev_info(ndp->ndev.dev, "NCSI: Cleared preferred package/channel\n");
- /* Bounce the NCSI channel to set changes */
- ncsi_stop_dev(&ndp->ndev);
- ncsi_start_dev(&ndp->ndev);
+ /* Update channel configuration */
+ if (!(ndp->flags & NCSI_DEV_RESET))
+ ncsi_reset_dev(&ndp->ndev);
return 0;
}
@@ -563,6 +584,138 @@ int ncsi_send_netlink_err(struct net_device *dev,
return nlmsg_unicast(net->genl_sock, skb, snd_portid);
}
+static int ncsi_set_package_mask_nl(struct sk_buff *msg,
+ struct genl_info *info)
+{
+ struct ncsi_dev_priv *ndp;
+ unsigned long flags;
+ int rc;
+
+ if (!info || !info->attrs)
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_IFINDEX])
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_PACKAGE_MASK])
+ return -EINVAL;
+
+ ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+ nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+ if (!ndp)
+ return -ENODEV;
+
+ spin_lock_irqsave(&ndp->lock, flags);
+ if (nla_get_flag(info->attrs[NCSI_ATTR_MULTI_FLAG])) {
+ if (ndp->flags & NCSI_DEV_HWA) {
+ ndp->multi_package = true;
+ rc = 0;
+ } else {
+ netdev_err(ndp->ndev.dev,
+ "NCSI: Can't use multiple packages without HWA\n");
+ rc = -EPERM;
+ }
+ } else {
+ ndp->multi_package = false;
+ rc = 0;
+ }
+
+ if (!rc)
+ ndp->package_whitelist =
+ nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_MASK]);
+ spin_unlock_irqrestore(&ndp->lock, flags);
+
+ if (!rc) {
+ /* Update channel configuration */
+ if (!(ndp->flags & NCSI_DEV_RESET))
+ ncsi_reset_dev(&ndp->ndev);
+ }
+
+ return rc;
+}
+
+static int ncsi_set_channel_mask_nl(struct sk_buff *msg,
+ struct genl_info *info)
+{
+ struct ncsi_package *np, *package;
+ struct ncsi_channel *nc, *channel;
+ u32 package_id, channel_id;
+ struct ncsi_dev_priv *ndp;
+ unsigned long flags;
+
+ if (!info || !info->attrs)
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_IFINDEX])
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_PACKAGE_ID])
+ return -EINVAL;
+
+ if (!info->attrs[NCSI_ATTR_CHANNEL_MASK])
+ return -EINVAL;
+
+ ndp = ndp_from_ifindex(get_net(sock_net(msg->sk)),
+ nla_get_u32(info->attrs[NCSI_ATTR_IFINDEX]));
+ if (!ndp)
+ return -ENODEV;
+
+ package_id = nla_get_u32(info->attrs[NCSI_ATTR_PACKAGE_ID]);
+ package = NULL;
+ NCSI_FOR_EACH_PACKAGE(ndp, np)
+ if (np->id == package_id) {
+ package = np;
+ break;
+ }
+ if (!package)
+ return -ERANGE;
+
+ spin_lock_irqsave(&package->lock, flags);
+
+ channel = NULL;
+ if (info->attrs[NCSI_ATTR_CHANNEL_ID]) {
+ channel_id = nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_ID]);
+ NCSI_FOR_EACH_CHANNEL(np, nc)
+ if (nc->id == channel_id) {
+ channel = nc;
+ break;
+ }
+ if (!channel) {
+ spin_unlock_irqrestore(&package->lock, flags);
+ return -ERANGE;
+ }
+ netdev_dbg(ndp->ndev.dev,
+ "NCSI: Channel %u set as preferred channel\n",
+ channel->id);
+ }
+
+ package->channel_whitelist =
+ nla_get_u32(info->attrs[NCSI_ATTR_CHANNEL_MASK]);
+ if (package->channel_whitelist == 0)
+ netdev_dbg(ndp->ndev.dev,
+ "NCSI: Package %u set to all channels disabled\n",
+ package->id);
+
+ package->preferred_channel = channel;
+
+ if (nla_get_flag(info->attrs[NCSI_ATTR_MULTI_FLAG])) {
+ package->multi_channel = true;
+ netdev_info(ndp->ndev.dev,
+ "NCSI: Multi-channel enabled on package %u\n",
+ package_id);
+ } else {
+ package->multi_channel = false;
+ }
+
+ spin_unlock_irqrestore(&package->lock, flags);
+
+ /* Update channel configuration */
+ if (!(ndp->flags & NCSI_DEV_RESET))
+ ncsi_reset_dev(&ndp->ndev);
+
+ return 0;
+}
+
static const struct genl_ops ncsi_ops[] = {
{
.cmd = NCSI_CMD_PKG_INFO,
@@ -589,6 +742,18 @@ static const struct genl_ops ncsi_ops[] = {
.doit = ncsi_send_cmd_nl,
.flags = GENL_ADMIN_PERM,
},
+ {
+ .cmd = NCSI_CMD_SET_PACKAGE_MASK,
+ .policy = ncsi_genl_policy,
+ .doit = ncsi_set_package_mask_nl,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = NCSI_CMD_SET_CHANNEL_MASK,
+ .policy = ncsi_genl_policy,
+ .doit = ncsi_set_channel_mask_nl,
+ .flags = GENL_ADMIN_PERM,
+ },
};
static struct genl_family ncsi_genl_family __ro_after_init = {
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 4d3f06be38bd..2a6d83a596c9 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -165,6 +165,15 @@ struct ncsi_rsp_oem_pkt {
unsigned char data[]; /* Payload data */
};
+/* Mellanox Response Data */
+struct ncsi_rsp_oem_mlx_pkt {
+ unsigned char cmd_rev; /* Command Revision */
+ unsigned char cmd; /* Command ID */
+ unsigned char param; /* Parameter */
+ unsigned char optional; /* Optional data */
+ unsigned char data[]; /* Data */
+};
+
/* Broadcom Response Data */
struct ncsi_rsp_oem_bcm_pkt {
unsigned char ver; /* Payload Version */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 77e07ba3f493..dc07fcc7938e 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -256,7 +256,7 @@ static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr)
if (!nc