diff options
author | Arjan van de Ven <arjan@linux.intel.com> | 2008-10-17 09:20:26 -0700 |
---|---|---|
committer | Arjan van de Ven <arjan@linux.intel.com> | 2008-10-17 09:20:26 -0700 |
commit | 651dab4264e4ba0e563f5ff56f748127246e9065 (patch) | |
tree | 016630974bdcb00fe529b673f96d389e0fd6dc94 /net | |
parent | 40b8606253552109815786e5d4b0de98782d31f5 (diff) | |
parent | 2e532d68a2b3e2aa6b19731501222069735c741c (diff) |
Merge commit 'linus/master' into merge-linus
Conflicts:
arch/x86/kvm/i8254.c
Diffstat (limited to 'net')
416 files changed, 29078 insertions, 13790 deletions
diff --git a/net/802/fc.c b/net/802/fc.c index cb3475ea6fda..34cf1ee014b8 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -82,13 +82,13 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev, static int fc_rebuild_header(struct sk_buff *skb) { +#ifdef CONFIG_INET struct fch_hdr *fch=(struct fch_hdr *)skb->data; struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr)); if(fcllc->ethertype != htons(ETH_P_IP)) { printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype)); return 0; } -#ifdef CONFIG_INET return arp_find(fch->daddr, skb); #else return 0; diff --git a/net/802/psnap.c b/net/802/psnap.c index b3cfe5a14fca..70980baeb682 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -1,7 +1,7 @@ /* * SNAP data link layer. Derived from 802.2 * - * Alan Cox <Alan.Cox@linux.org>, + * Alan Cox <alan@lxorguk.ukuu.org.uk>, * from the 802.2 layer by Greg Page. * Merged in additions from Greg Page's psnap.c. * diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index b661f47bf10a..f0e335aa20df 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -394,6 +394,7 @@ static void vlan_transfer_features(struct net_device *dev, vlandev->features &= ~dev->vlan_features; vlandev->features |= dev->features & dev->vlan_features; + vlandev->gso_max_size = dev->gso_max_size; if (old_features != vlandev->features) netdev_features_change(vlandev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4bf014e51f8c..8883e9c8a223 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -48,7 +48,7 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb) switch (veth->h_vlan_encapsulated_proto) { #ifdef CONFIG_INET - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): /* TODO: Confirm this will work with VLAN headers... */ return arp_find(veth->h_dest, skb); @@ -607,6 +607,7 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_PRESENT); dev->features |= real_dev->features & real_dev->vlan_features; + dev->gso_max_size = real_dev->gso_max_size; /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; diff --git a/net/9p/client.c b/net/9p/client.c index 2ffe40cf2f01..e053e06028a5 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -52,7 +52,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_msize, "msize=%u"}, {Opt_legacy, "noextend"}, {Opt_trans, "trans=%s"}, @@ -75,7 +75,6 @@ static int parse_opts(char *opts, struct p9_client *clnt) int option; int ret = 0; - clnt->trans_mod = v9fs_default_trans(); clnt->dotu = 1; clnt->msize = 8192; @@ -108,7 +107,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) clnt->msize = option; break; case Opt_trans: - clnt->trans_mod = v9fs_match_trans(&args[0]); + clnt->trans_mod = v9fs_get_trans_by_name(&args[0]); break; case Opt_legacy: clnt->dotu = 0; @@ -117,6 +116,10 @@ static int parse_opts(char *opts, struct p9_client *clnt) continue; } } + + if (!clnt->trans_mod) + clnt->trans_mod = v9fs_get_default_trans(); + kfree(options); return ret; } @@ -150,6 +153,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) if (!clnt) return ERR_PTR(-ENOMEM); + clnt->trans_mod = NULL; clnt->trans = NULL; spin_lock_init(&clnt->lock); INIT_LIST_HEAD(&clnt->fidlist); @@ -235,6 +239,8 @@ void p9_client_destroy(struct p9_client *clnt) clnt->trans = NULL; } + v9fs_put_trans(clnt->trans_mod); + list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) p9_fid_destroy(fid); diff --git a/net/9p/conv.c b/net/9p/conv.c index 44547201f5bc..5ad3a3bd73b2 100644 --- a/net/9p/conv.c +++ b/net/9p/conv.c @@ -451,8 +451,10 @@ p9_put_data(struct cbuf *bufp, const char *data, int count, unsigned char **pdata) { *pdata = buf_alloc(bufp, count); + if (*pdata == NULL) + return -ENOMEM; memmove(*pdata, data, count); - return count; + return 0; } static int @@ -460,6 +462,8 @@ p9_put_user_data(struct cbuf *bufp, const char __user *data, int count, unsigned char **pdata) { *pdata = buf_alloc(bufp, count); + if (*pdata == NULL) + return -ENOMEM; return copy_from_user(*pdata, data, count); } diff --git a/net/9p/mod.c b/net/9p/mod.c index bdee1fb7cc62..1084feb24cb0 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -31,6 +31,7 @@ #include <linux/parser.h> #include <net/9p/transport.h> #include <linux/list.h> +#include <linux/spinlock.h> #ifdef CONFIG_NET_9P_DEBUG unsigned int p9_debug_level = 0; /* feature-rific global debug level */ @@ -44,8 +45,8 @@ MODULE_PARM_DESC(debug, "9P debugging level"); * */ +static DEFINE_SPINLOCK(v9fs_trans_lock); static LIST_HEAD(v9fs_trans_list); -static struct p9_trans_module *v9fs_default_transport; /** * v9fs_register_trans - register a new transport with 9p @@ -54,48 +55,87 @@ static struct p9_trans_module *v9fs_default_transport; */ void v9fs_register_trans(struct p9_trans_module *m) { + spin_lock(&v9fs_trans_lock); list_add_tail(&m->list, &v9fs_trans_list); - if (m->def) - v9fs_default_transport = m; + spin_unlock(&v9fs_trans_lock); } EXPORT_SYMBOL(v9fs_register_trans); /** - * v9fs_match_trans - match transport versus registered transports + * v9fs_unregister_trans - unregister a 9p transport + * @m: the transport to remove + * + */ +void v9fs_unregister_trans(struct p9_trans_module *m) +{ + spin_lock(&v9fs_trans_lock); + list_del_init(&m->list); + spin_unlock(&v9fs_trans_lock); +} +EXPORT_SYMBOL(v9fs_unregister_trans); + +/** + * v9fs_get_trans_by_name - get transport with the matching name * @name: string identifying transport * */ -struct p9_trans_module *v9fs_match_trans(const substring_t *name) +struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name) { - struct list_head *p; - struct p9_trans_module *t = NULL; - - list_for_each(p, &v9fs_trans_list) { - t = list_entry(p, struct p9_trans_module, list); - if (strncmp(t->name, name->from, name->to-name->from) == 0) - return t; - } - return NULL; + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (strncmp(t->name, name->from, name->to-name->from) == 0 && + try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; } -EXPORT_SYMBOL(v9fs_match_trans); +EXPORT_SYMBOL(v9fs_get_trans_by_name); /** - * v9fs_default_trans - returns pointer to default transport + * v9fs_get_default_trans - get the default transport * */ -struct p9_trans_module *v9fs_default_trans(void) +struct p9_trans_module *v9fs_get_default_trans(void) { - if (v9fs_default_transport) - return v9fs_default_transport; - else if (!list_empty(&v9fs_trans_list)) - return list_first_entry(&v9fs_trans_list, - struct p9_trans_module, list); - else - return NULL; + struct p9_trans_module *t, *found = NULL; + + spin_lock(&v9fs_trans_lock); + + list_for_each_entry(t, &v9fs_trans_list, list) + if (t->def && try_module_get(t->owner)) { + found = t; + break; + } + + if (!found) + list_for_each_entry(t, &v9fs_trans_list, list) + if (try_module_get(t->owner)) { + found = t; + break; + } + + spin_unlock(&v9fs_trans_lock); + return found; } -EXPORT_SYMBOL(v9fs_default_trans); +EXPORT_SYMBOL(v9fs_get_default_trans); +/** + * v9fs_put_trans - put trans + * @m: transport to put + * + */ +void v9fs_put_trans(struct p9_trans_module *m) +{ + if (m) + module_put(m->owner); +} /** * v9fs_init - Initialize module @@ -120,6 +160,8 @@ static int __init init_p9(void) static void __exit exit_p9(void) { printk(KERN_INFO "Unloading 9P2000 support\n"); + + p9_trans_fd_exit(); } module_init(init_p9) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index cdf137af7adc..6dabbdb66651 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -86,7 +86,7 @@ enum { Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, @@ -151,7 +151,6 @@ struct p9_mux_poll_task { * @trans: reference to transport instance for this connection * @tagpool: id accounting for transactions * @err: error state - * @equeue: event wait_q (?) * @req_list: accounting for requests which have been sent * @unsent_req_list: accounting for requests that haven't been sent * @rcall: current response &p9_fcall structure @@ -178,7 +177,6 @@ struct p9_conn { struct p9_trans *trans; struct p9_idpool *tagpool; int err; - wait_queue_head_t equeue; struct list_head req_list; struct list_head unsent_req_list; struct p9_fcall *rcall; @@ -240,22 +238,6 @@ static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, static void p9_conn_cancel(struct p9_conn *m, int err); -static int p9_mux_global_init(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) - p9_mux_poll_tasks[i].task = NULL; - - p9_mux_wq = create_workqueue("v9fs"); - if (!p9_mux_wq) { - printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); - return -ENOMEM; - } - - return 0; -} - static u16 p9_mux_get_tag(struct p9_conn *m) { int tag; @@ -409,11 +391,11 @@ static void p9_mux_poll_stop(struct p9_conn *m) static struct p9_conn *p9_conn_create(struct p9_trans *trans) { int i, n; - struct p9_conn *m, *mtmp; + struct p9_conn *m; P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans, trans->msize); - m = kmalloc(sizeof(struct p9_conn), GFP_KERNEL); + m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); if (!m) return ERR_PTR(-ENOMEM); @@ -424,25 +406,14 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans) m->trans = trans; m->tagpool = p9_idpool_create(); if (IS_ERR(m->tagpool)) { - mtmp = ERR_PTR(-ENOMEM); kfree(m); - return mtmp; + return ERR_PTR(-ENOMEM); } - m->err = 0; - init_waitqueue_head(&m->equeue); INIT_LIST_HEAD(&m->req_list); INIT_LIST_HEAD(&m->unsent_req_list); - m->rcall = NULL; - m->rpos = 0; - m->rbuf = NULL; - m->wpos = m->wsize = 0; - m->wbuf = NULL; INIT_WORK(&m->rq, p9_read_work); INIT_WORK(&m->wq, p9_write_work); - m->wsched = 0; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - m->poll_task = NULL; n = p9_mux_poll_start(m); if (n) { kfree(m); @@ -463,10 +434,8 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans) for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { if (IS_ERR(m->poll_waddr[i])) { p9_mux_poll_stop(m); - mtmp = (void *)m->poll_waddr; /* the error code */ kfree(m); - m = mtmp; - break; + return (void *)m->poll_waddr; /* the error code */ } } @@ -483,18 +452,13 @@ static void p9_conn_destroy(struct p9_conn *m) { P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m, m->mux_list.prev, m->mux_list.next); - p9_conn_cancel(m, -ECONNRESET); - - if (!list_empty(&m->req_list)) { - /* wait until all processes waiting on this session exit */ - P9_DPRINTK(P9_DEBUG_MUX, - "mux %p waiting for empty request queue\n", m); - wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p request queue empty: %d\n", m, - list_empty(&m->req_list)); - } p9_mux_poll_stop(m); + cancel_work_sync(&m->rq); + cancel_work_sync(&m->wq); + + p9_conn_cancel(m, -ECONNRESET); + m->trans = NULL; p9_idpool_destroy(m->tagpool); kfree(m); @@ -840,8 +804,6 @@ static void p9_read_work(struct work_struct *work) (*req->cb) (req, req->cba); else kfree(req->rcall); - - wake_up(&m->equeue); } } else { if (err >= 0 && rcall->id != P9_RFLUSH) @@ -908,8 +870,10 @@ static struct p9_req *p9_send_request(struct p9_conn *m, else n = p9_mux_get_tag(m); - if (n < 0) + if (n < 0) { + kfree(req); return ERR_PTR(-ENOMEM); + } p9_set_tag(tc, n); @@ -984,8 +948,6 @@ static void p9_mux_flush_cb(struct p9_req *freq, void *a) (*req->cb) (req, req->cba); else kfree(req->rcall); - - wake_up(&m->equeue); } kfree(freq->tcall); @@ -1191,8 +1153,6 @@ void p9_conn_cancel(struct p9_conn *m, int err) else kfree(req->rcall); } - - wake_up(&m->equeue); } /** @@ -1370,7 +1330,6 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt) { int ret, n; struct p9_trans_fd *ts = NULL; - mm_segment_t oldfs; if (trans && trans->status == Connected) ts = trans->priv; @@ -1384,24 +1343,17 @@ p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt) if (!ts->wr->f_op || !ts->wr->f_op->poll) return -EIO; - oldfs = get_fs(); - set_fs(get_ds()); - ret = ts->rd->f_op->poll(ts->rd, pt); if (ret < 0) - goto end; + return ret; if (ts->rd != ts->wr) { n = ts->wr->f_op->poll(ts->wr, pt); - if (n < 0) { - ret = n; - goto end; - } + if (n < 0) + return n; ret = (ret & ~POLLOUT) | (n & ~POLLIN); } -end: - set_fs(oldfs); return ret; } @@ -1629,6 +1581,7 @@ static struct p9_trans_module p9_tcp_trans = { .maxsize = MAX_SOCK_BUF, .def = 1, .create = p9_trans_create_tcp, + .owner = THIS_MODULE, }; static struct p9_trans_module p9_unix_trans = { @@ -1636,6 +1589,7 @@ static struct p9_trans_module p9_unix_trans = { .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_trans_create_unix, + .owner = THIS_MODULE, }; static struct p9_trans_module p9_fd_trans = { @@ -1643,14 +1597,20 @@ static struct p9_trans_module p9_fd_trans = { .maxsize = MAX_SOCK_BUF, .def = 0, .create = p9_trans_create_fd, + .owner = THIS_MODULE, }; int p9_trans_fd_init(void) { - int ret = p9_mux_global_init(); - if (ret) { - printk(KERN_WARNING "9p: starting mux failed\n"); - return ret; + int i; + + for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) + p9_mux_poll_tasks[i].task = NULL; + + p9_mux_wq = create_workqueue("v9fs"); + if (!p9_mux_wq) { + printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); + return -ENOMEM; } v9fs_register_trans(&p9_tcp_trans); @@ -1659,4 +1619,12 @@ int p9_trans_fd_init(void) return 0; } -EXPORT_SYMBOL(p9_trans_fd_init); + +void p9_trans_fd_exit(void) +{ + v9fs_unregister_trans(&p9_tcp_trans); + v9fs_unregister_trans(&p9_unix_trans); + v9fs_unregister_trans(&p9_fd_trans); + + destroy_workqueue(p9_mux_wq); +} diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 42adc052b149..94912e077a55 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -528,6 +528,7 @@ static struct p9_trans_module p9_virtio_trans = { .create = p9_virtio_create, .maxsize = PAGE_SIZE*16, .def = 0, + .owner = THIS_MODULE, }; /* The standard init function */ @@ -545,6 +546,7 @@ static int __init p9_virtio_init(void) static void __exit p9_virtio_cleanup(void) { unregister_virtio_driver(&p9_virtio_drv); + v9fs_unregister_trans(&p9_virtio_trans); } module_init(p9_virtio_init); diff --git a/net/Kconfig b/net/Kconfig index 7612cc8c337c..d789d79551ae 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -180,6 +180,7 @@ source "net/tipc/Kconfig" source "net/atm/Kconfig" source "net/802/Kconfig" source "net/bridge/Kconfig" +source "net/dsa/Kconfig" source "net/8021q/Kconfig" source "net/decnet/Kconfig" source "net/llc/Kconfig" @@ -232,18 +233,23 @@ source "net/can/Kconfig" source "net/irda/Kconfig" source "net/bluetooth/Kconfig" source "net/rxrpc/Kconfig" +source "net/phonet/Kconfig" config FIB_RULES bool -menu "Wireless" +menuconfig WIRELESS + bool "Wireless" depends on !S390 + default y + +if WIRELESS source "net/wireless/Kconfig" source "net/mac80211/Kconfig" source "net/ieee80211/Kconfig" -endmenu +endif # WIRELESS source "net/rfkill/Kconfig" source "net/9p/Kconfig" diff --git a/net/Makefile b/net/Makefile index 4f43e7f874f3..27d1f10dc0e0 100644 --- a/net/Makefile +++ b/net/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_NET_SCHED) += sched/ obj-$(CONFIG_BRIDGE) += bridge/ +obj-$(CONFIG_NET_DSA) += dsa/ obj-$(CONFIG_IPX) += ipx/ obj-$(CONFIG_ATALK) += appletalk/ obj-$(CONFIG_WAN_ROUTER) += wanrouter/ @@ -42,6 +43,7 @@ obj-$(CONFIG_AF_RXRPC) += rxrpc/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_DECNET) += decnet/ obj-$(CONFIG_ECONET) += econet/ +obj-$(CONFIG_PHONET) += phonet/ ifneq ($(CONFIG_VLAN_8021Q),) obj-y += 8021q/ endif diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 0c850427a85b..d3134e7e6ee8 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -2,7 +2,7 @@ * DDP: An implementation of the AppleTalk DDP protocol for * Ethernet 'ELAP'. * - * Alan Cox <Alan.Cox@linux.org> + * Alan Cox <alan@lxorguk.ukuu.org.uk> * * With more than a little assistance from * @@ -1934,6 +1934,6 @@ static void __exit atalk_exit(void) module_exit(atalk_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Alan Cox <Alan.Cox@linux.org>"); +MODULE_AUTHOR("Alan Cox <alan@lxorguk.ukuu.org.uk>"); MODULE_DESCRIPTION("AppleTalk 0.20\n"); MODULE_ALIAS_NETPROTO(PF_APPLETALK); diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 8d9a6f158880..280de481edc7 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -375,11 +375,11 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) if (memcmp (skb->data + 6, ethertype_ipv6, sizeof(ethertype_ipv6)) == 0) - skb->protocol = __constant_htons(ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); else if (memcmp (skb->data + 6, ethertype_ipv4, sizeof(ethertype_ipv4)) == 0) - skb->protocol = __constant_htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); else goto error; skb_pull(skb, sizeof(llc_oui_ipv4)); @@ -404,9 +404,9 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) skb_reset_network_header(skb); iph = ip_hdr(skb); if (iph->version == 4) - skb->protocol = __constant_htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); else if (iph->version == 6) - skb->protocol = __constant_htons(ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); else goto error; skb->pkt_type = PACKET_HOST; diff --git a/net/atm/lec.c b/net/atm/lec.c index 5799fb52365a..8f701cde5945 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1931,7 +1931,6 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, switch (priv->lane_version) { case 1: return priv->mcast_vcc; - break; case 2: /* LANE2 wants arp for multicast addresses */ if (!compare_ether_addr(mac_to_find, bus_mac)) return priv->mcast_vcc; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 01c83e2a4c19..28c71574a781 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -317,6 +317,9 @@ void ax25_destroy_socket(ax25_cb *ax25) /* Queue the unaccepted socket for death */ sock_orphan(skb->sk); + /* 9A4GL: hack to release unaccepted sockets */ + skb->sk->sk_state = TCP_LISTEN; + ax25_start_heartbeat(sax25); sax25->state = AX25_STATE_0; } diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index cdc7e751ef36..96e4b9273250 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -39,9 +39,11 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: - if (!sk || - sock_flag(sk, SOCK_DESTROY) || - sock_flag(sk, SOCK_DEAD)) { + /* Magic here: If we listen() and a new link dies before it + is accepted() it isn't 'dead' so doesn't get removed. */ + if (!sk || sock_flag(sk, SOCK_DESTROY) || + (sk->sk_state == TCP_LISTEN && + sock_flag(sk, SOCK_DEAD))) { if (sk) { sock_hold(sk); ax25_destroy_socket(ax25); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 1edfdf4c095b..f6348e078aa4 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -49,7 +49,7 @@ #define BT_DBG(D...) #endif -#define VERSION "2.12" +#define VERSION "2.13" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index ca8d05245ca0..b7002429f152 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -330,7 +330,7 @@ EXPORT_SYMBOL(hci_get_route); /* Create SCO or ACL connection. * Device _must_ be locked */ -struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) +struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 auth_type) { struct hci_conn *acl; struct hci_conn *sco; @@ -344,8 +344,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) hci_conn_hold(acl); - if (acl->state == BT_OPEN || acl->state == BT_CLOSED) + if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { + acl->auth_type = auth_type; hci_acl_connect(acl); + } if (type == ACL_LINK) return acl; @@ -374,6 +376,19 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst) } EXPORT_SYMBOL(hci_connect); +/* Check link security requirement */ +int hci_conn_check_link_mode(struct hci_conn *conn) +{ + BT_DBG("conn %p", conn); + + if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0 && + !(conn->link_mode & HCI_LM_ENCRYPT)) + return 0; + + return 1; +} +EXPORT_SYMBOL(hci_conn_check_link_mode); + /* Authenticate remote device */ int hci_conn_auth(struct hci_conn *conn) { @@ -381,7 +396,7 @@ int hci_conn_auth(struct hci_conn *conn) if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) { if (!(conn->auth_type & 0x01)) { - conn->auth_type = HCI_AT_GENERAL_BONDING_MITM; + conn->auth_type |= 0x01; conn->link_mode &= ~HCI_LM_AUTH; } } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index f5b21cb93699..278a3ace14f6 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -164,6 +164,9 @@ static inline int hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev * { int ret; + if (!test_bit(HCI_UP, &hdev->flags)) + return -ENETDOWN; + /* Serialize all requests */ hci_req_lock(hdev); ret = __hci_request(hdev, req, opt, timeout); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0e3db289f4be..ad7a553d7713 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1605,14 +1605,11 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b if (conn->state == BT_CONFIG) { if (!ev->status && hdev->ssp_mode > 0 && - conn->ssp_mode > 0) { - if (conn->out) { - struct hci_cp_auth_requested cp; - cp.handle = ev->handle; - hci_send_cmd(hdev, - HCI_OP_AUTH_REQUESTED, + conn->ssp_mode > 0 && conn->out) { + struct hci_cp_auth_requested cp; + cp.handle = ev->handle; + hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); - } } else { conn->state = BT_CONNECTED; hci_proto_connect_cfm(conn, ev->status); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 96434d774c84..acdeab3d9807 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -578,7 +578,7 @@ static int hidp_session(void *arg) if (session->hid) { if (session->hid->claimed & HID_CLAIMED_INPUT) hidinput_disconnect(session->hid); - hid_free_device(session->hid); + hid_destroy_device(session->hid); } /* Wakeup user-space polling for socket errors */ @@ -623,9 +623,15 @@ static struct device *hidp_get_device(struct hidp_session *session) static int hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req) { - struct input_dev *input = session->input; + struct input_dev *input; int i; + input = input_allocate_device(); + if (!input) + return -ENOMEM; + + session->input = input; + input_set_drvdata(input, session); input->name = "Bluetooth HID Boot Protocol Device"; @@ -677,67 +683,114 @@ static void hidp_close(struct hid_device *hid) { } -static const struct { - __u16 idVendor; - __u16 idProduct; - unsigned quirks; -} hidp_blacklist[] = { - /* Apple wireless Mighty Mouse */ - { 0x05ac, 0x030c, HID_QUIRK_MIGHTYMOUSE | HID_QUIRK_INVERT_HWHEEL }, +static int hidp_parse(struct hid_device *hid) +{ + struct hidp_session *session = hid->driver_data; + struct hidp_connadd_req *req = session->req; + unsigned char *buf; + int ret; - { } /* Terminating entry */ -}; + buf = kmalloc(req->rd_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, req->rd_data, req->rd_size)) { + kfree(buf); + return -EFAULT; + } + + ret = hid_parse_report(session->hid, buf, req->rd_size); + + kfree(buf); + + if (ret) + return ret; + + session->req = NULL; + + return 0; +} + +static int hidp_start(struct hid_device *hid) +{ + struct hidp_session *session = hid->driver_data; + struct hid_report *report; -static void hidp_setup_quirks(struct hid_device *hid) + list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT]. + report_list, list) + hidp_send_report(session, report); + + list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT]. + report_list, list) + hidp_send_report(session, report); + + return 0; +} + +static void hidp_stop(struct hid_device *hid) { - unsigned int n; + struct hidp_session *session = hid->driver_data; + + skb_queue_purge(&session->ctrl_transmit); + skb_queue_purge(&session->intr_transmit); - for (n = 0; hidp_blacklist[n].idVendor; n++) - if (hidp_blacklist[n].idVendor == le16_to_cpu(hid->vendor) && - hidp_blacklist[n].idProduct == le16_to_cpu(hid->product)) - hid->quirks = hidp_blacklist[n].quirks; + if (hid->claimed & HID_CLAIMED_INPUT) + hidinput_disconnect(hid); + hid->claimed = 0; } -static void hidp_setup_hid(struct hidp_session *session, +static struct hid_ll_driver hidp_hid_driver = { + .parse = hidp_parse, + .start = hidp_start, + .stop = hidp_stop, + .open = hidp_open, + .close = hidp_close, + .hidinput_input_event = hidp_hidinput_event, +}; + +static int hidp_setup_hid(struct hidp_session *session, struct hidp_connadd_req *req) { - struct hid_device *hid = session->hid; - struct hid_report *report; + struct hid_device *hid; bdaddr_t src, dst; + int ret; - baswap(&src, &bt_sk(session->ctrl_sock->sk)->src); - baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst); + hid = hid_allocate_device(); + if (IS_ERR(hid)) { + ret = PTR_ERR(session->hid); + goto err; + } + session->hid = hid; + session->req = req; hid->driver_data = session; - hid->country = req->country; + baswap(&src, &bt_sk(session->ctrl_sock->sk)->src); + baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst); hid->bus = BUS_BLUETOOTH; hid->vendor = req->vendor; hid->product = req->product; hid->version = req->version; + hid->country = req->country; strncpy(hid->name, req->name, 128); strncpy(hid->phys, batostr(&src), 64); strncpy(hid->uniq, batostr(&dst), 64); - hid->dev = hidp_get_device(session); - - hid->hid_open = hidp_open; - hid->hid_close = hidp_close; - - hid->hidinput_input_event = hidp_hidinput_event; + hid->dev.parent = hidp_get_device(session); + hid->ll_driver = &hidp_hid_driver; - hidp_setup_quirks(hid); + ret = hid_add_device(hid); + if (ret) + goto err_hid; - list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].report_list, list) - hidp_send_report(session, report); - - list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].report_list, list) - hidp_send_report(session, report); - - if (hidinput_connect(hid) == 0) - hid->claimed |= HID_CLAIMED_INPUT; + return 0; +err_hid: + hid_destroy_device(hid); + session->hid = NULL; +err: + return ret; } int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock) @@ -757,38 +810,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size); - if (req->rd_size > 0) { - unsigned char *buf = kmalloc(req->rd_size, GFP_KERNEL); - - if (!buf) { - kfree(session); - return -ENOMEM; - } - - if (copy_from_user(buf, req->rd_data, req->rd_size)) { - kfree(buf); - kfree(session); - return -EFAULT; - } - - session->hid = hid_parse_report(buf, req->rd_size); - - kfree(buf); - - if (!session->hid) { - kfree(session); - return -EINVAL; - } - } - - if (!session->hid) { - session->input = input_allocate_device(); - if (!session->input) { - kfree(session); - return -ENOMEM; - } - } - down_write(&hidp_session_sem); s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst); @@ -816,15 +837,18 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); session->idle_to = req->idle_to; - if (session->input) { + if (req->rd_size > 0) { + err = hidp_setup_hid(session, req); + if (err && err != -ENODEV) + goto err_skb; + } + + if (!session->hid) { err = hidp_setup_input(session, req); if (err < 0) - goto failed; + goto err_skb; } - if (session->hid) - hidp_setup_hid(session, req); - __hidp_link_session(session); hidp_set_timer(session); @@ -850,17 +874,16 @@ unlink: __hidp_unlink_session(session); - if (session->input) { + if (session->input) input_unregister_device(session->input); - session->input = NULL; /* don't try to free it here */ - } - + if (session->hid) + hid_destroy_device(session->hid); +err_skb: + skb_queue_purge(&session->ctrl_transmit); + skb_queue_purge(&session->intr_transmit); failed: up_write(&hidp_session_sem); - if (session->hid) - hid_free_device(session->hid); - input_free_device(session->input); kfree(session); return err; @@ -950,18 +973,43 @@ int hidp_get_conninfo(struct hidp_conninfo *ci) return err; } +static const struct hid_device_id hidp_table[] = { + { HID_BLUETOOTH_DEVICE(HID_ANY_ID, HID_ANY_ID) }, + { } +}; + +static struct hid_driver hidp_driver = { + .name = "generic-bluetooth", + .id_table = hidp_table, +}; + static int __init hidp_init(void) { + int ret; + l2cap_load(); BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION); - return hidp_init_sockets(); + ret = hid_register_driver(&hidp_driver); + if (ret) + goto err; + + ret = hidp_init_sockets(); + if (ret) + goto err_drv; + + return 0; +err_drv: + hid_unregister_driver(&hidp_driver); +err: + return ret; } static void __exit hidp_exit(void) { hidp_cleanup_sockets(); + hid_unregister_driver(&hidp_driver); } module_init(hidp_init); diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index 343fb0566b3e..e503c89057ad 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -151,6 +151,8 @@ struct hidp_session { struct sk_buff_head ctrl_transmit; struct sk_buff_head intr_transmit; + + struct hidp_connadd_req *req; }; static inline void hidp_schedule(struct hidp_session *session) diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 3396d5bdef1c..9610a9c85b98 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -55,7 +55,7 @@ #define BT_DBG(D...) #endif -#define VERSION "2.10" +#define VERSION "2.11" static u32 l2cap_feat_mask = 0x0000; @@ -778,6 +778,7 @@ static int l2cap_do_connect(struct sock *sk) struct l2cap_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; + __u8 auth_type; int err = 0; BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst), l2cap_pi(sk)->psm); @@ -789,7 +790,21 @@ static int l2cap_do_connect(struct sock *sk) err = -ENOMEM; - hcon = hci_connect(hdev, ACL_LINK, dst); + if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH || + l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT || + l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) { + if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) + auth_type = HCI_AT_NO_BONDING_MITM; + else + auth_type = HCI_AT_GENERAL_BONDING_MITM; + } else { + if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) + auth_type = HCI_AT_NO_BONDING; + else + auth_type = HCI_AT_GENERAL_BONDING; + } + + hcon = hci_connect(hdev, ACL_LINK, dst, auth_type); if (!hcon) goto done; @@ -1553,10 +1568,10 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; struct l2cap_conn_rsp rsp; struct sock *sk, *parent; - int result, status = 0; + int result, status = L2CAP_CS_NO_INFO; u16 dcid = 0, scid = __le16_to_cpu(req->scid); - __le16 psm = req->psm; + __le16 psm = req->psm; BT_DBG("psm 0x%2.2x scid 0x%4.4x", psm, scid); @@ -1567,6 +1582,13 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd goto sendresp; } + /* Check if the ACL is secure enough (if not SDP) */ + if (psm != cpu_to_le16(0x0001) && + !hci_conn_check_link_mode(conn->hcon)) { + result = L2CAP_CR_SEC_BLOCK; + goto response; + } + result = L2CAP_CR_NO_MEM; /* Check for backlog size */ @@ -2224,7 +2246,7 @@ static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(0); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); } @@ -2296,7 +2318,7 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(0); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); l2cap_send_cmd(conn, l2cap_pi(sk)->ident, L2CAP_CONN_RSP, sizeof(rsp), &rsp); } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index a16011fedc1d..0cc91e6da76d 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -200,7 +200,7 @@ static int sco_connect(struct sock *sk) else type = SCO_LINK; - hcon = hci_connect(hdev, type, dst); + hcon = hci_connect(hdev, type, dst, HCI_AT_NO_BONDING); if (!hcon) goto done; diff --git a/net/bridge/br.c b/net/bridge/br.c index 573acdf6f9ff..4d2c1f1cb524 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -28,6 +28,10 @@ static const struct stp_proto br_stp_proto = { .rcv = br_stp_rcv, }; +static struct pernet_operations br_net_ops = { + .exit = br_net_exit, +}; + static int __init br_init(void) { int err; @@ -42,18 +46,22 @@ static int __init br_init(void) if (err) goto err_out; - err = br_netfilter_init(); + err = register_pernet_subsys(&br_net_ops); if (err) goto err_out1; - err = register_netdevice_notifier(&br_device_notifier); + err = br_netfilter_init(); if (err) goto err_out2; - err = br_netlink_init(); + err = register_netdevice_notifier(&br_device_notifier); if (err) goto err_out3; + err = br_netlink_init(); + if (err) + goto err_out4; + brioctl_set(br_ioctl_deviceless_stub); br_handle_frame_hook = br_handle_frame; @@ -61,10 +69,12 @@ static int __init br_init(void) br_fdb_put_hook = br_fdb_put; return 0; -err_out3: +err_out4: unregister_netdevice_notifier(&br_device_notifier); -err_out2: +err_out3: br_netfilter_fini(); +err_out2: + unregister_pernet_subsys(&br_net_ops); err_out1: br_fdb_fini(); err_out: @@ -80,7 +90,7 @@ static void __exit br_deinit(void) unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); - br_cleanup_bridges(); + unregister_pernet_subsys(&br_net_ops); synchronize_net(); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 4f52c3d50ebe..22ba8632196f 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -178,5 +178,6 @@ void br_dev_setup(struct net_device *dev) dev->priv_flags = IFF_EBRIDGE; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX; + NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | + NETIF_F_NETNS_LOCAL; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 63c18aacde8c..573e20f7dba4 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -168,7 +168,7 @@ static void del_br(struct net_bridge *br) unregister_netdevice(br->dev); } -static struct net_device *new_bridge_dev(const char *name) +static struct net_device *new_bridge_dev(struct net *net, const char *name) { struct net_bridge *br; struct net_device *dev; @@ -178,6 +178,7 @@ static struct net_device *new_bridge_dev(const char *name) if (!dev) return NULL; + dev_net_set(dev, net); br = netdev_priv(dev); br->dev = dev; @@ -262,12 +263,12 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, return p; } -int br_add_bridge(const char *name) +int br_add_bridge(struct net *net, const char *name) { struct net_device *dev; int ret; - dev = new_bridge_dev(name); + dev = new_bridge_dev(net, name); if (!dev) return -ENOMEM; @@ -294,13 +295,13 @@ out_free: goto out; } -int br_del_bridge(const char *name) +int br_del_bridge(struct net *net, const char *name) { struct net_device *dev; int ret = 0; rtnl_lock(); - dev = __dev_get_by_name(&init_net, name); + dev = __dev_get_by_name(net, name); if (dev == NULL) ret = -ENXIO; /* Could not find device */ @@ -445,13 +446,13 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) return 0; } -void __exit br_cleanup_bridges(void) +void br_net_exit(struct net *net) { struct net_device *dev; rtnl_lock(); restart: - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { if (dev->priv_flags & IFF_EBRIDGE) { del_br(dev->priv); goto restart; diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index eeee218eed80..6a6433daaf27 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -21,12 +21,12 @@ #include "br_private.h" /* called with RTNL */ -static int get_bridge_ifindices(int *indices, int num) +static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) @@ -89,7 +89,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(dev_net(br->dev), ifindex); if (dev == NULL) return -EINVAL; @@ -188,15 +188,21 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return 0; case BRCTL_SET_BRIDGE_HELLO_TIME: + { + unsigned long t = clock_t_to_jiffies(args[1]); if (!capable(CAP_NET_ADMIN)) return -EPERM; + if (t < HZ) + return -EINVAL; + spin_lock_bh(&br->lock); - br->bridge_hello_time = clock_t_to_jiffies(args[1]); + br->bridge_hello_time = t; if (br_is_root_bridge(br)) br->hello_time = br->bridge_hello_time; spin_unlock_bh(&br->lock); return 0; + } case BRCTL_SET_BRIDGE_MAX_AGE: if (!capable(CAP_NET_ADMIN)) @@ -309,7 +315,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return -EOPNOTSUPP; } -static int old_deviceless(void __user *uarg) +static int old_deviceless(struct net *net, void __user *uarg) { unsigned long args[3]; @@ -331,7 +337,7 @@ static int old_deviceless(void __user *uarg) if (indices == NULL) return -ENOMEM; - args[2] = get_bridge_ifindices(indices, args[2]); + args[2] = get_bridge_ifindices(net, indices, args[2]); ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int)) ? -EFAULT : args[2]; @@ -354,9 +360,9 @@ static int old_deviceless(void __user *uarg) buf[IFNAMSIZ-1] = 0; if (args[0] == BRCTL_ADD_BRIDGE) - return br_add_bridge(buf); + return br_add_bridge(net, buf); - return br_del_bridge(buf); + return br_del_bridge(net, buf); } } @@ -368,7 +374,7 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uar switch (cmd) { case SIOCGIFBR: case SIOCSIFBR: - return old_deviceless(uarg); + return old_deviceless(net, uarg); case SIOCBRADDBR: case SIOCBRDELBR: @@ -383,9 +389,9 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uar buf[IFNAMSIZ-1] = 0; if (cmd == SIOCBRADDBR) - return br_add_bridge(buf); + return br_add_bridge(net, buf); - return br_del_bridge(buf); + return br_del_bridge(net, buf); } } return -EOPNOTSUPP; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 6a9a6cd74b1e..a4abed5b4c44 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -657,7 +657,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, { struct nf_bridge_info *nf_bridge; struct net_device *parent; - int pf; + u_int8_t pf; if (!skb->nf_bridge) return NF_ACCEPT; @@ -791,7 +791,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, { struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct net_device *realoutdev = bridge_parent(skb->dev); - int pf; + u_int8_t pf; #ifdef CONFIG_NETFILTER_DEBUG /* Be very paranoid. This probably won't happen anymore, but let's diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index f155e6ce8a21..ba7be195803c 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -82,6 +82,7 @@ nla_put_failure: */ void br_ifinfo_notify(int event, struct net_bridge_port *port) { + struct net *net = dev_net(port->dev); struct sk_buff *skb; int err = -ENOBUFS; @@ -97,10 +98,10 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net,0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); } /* @@ -112,11 +113,8 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; int idx; - if (net != &init_net) - return 0; - idx = 0; - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { /* not a bridge port */ if (dev->br_port == NULL || idx < cb->args[0]) goto skip; @@ -147,9 +145,6 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_bridge_port *p; u8 new_state; - if (net != &init_net) - return -EINVAL; - if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -165,7 +160,7 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (new_state > BR_STATE_BLOCKING) return -EINVAL; - dev = __dev_get_by_index(&init_net, ifm->ifi_index); + dev = __dev_get_by_index(net, ifm->ifi_index); if (!dev) return -ENODEV; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 76340bdd052e..763a3ec292e5 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -35,9 +35,6 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v struct net_bridge_port *p = dev->br_port; struct net_bridge *br; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - /* not a port of a bridge */ if (p == NULL) return NOTIFY_DONE; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c3dc18ddc043..b6c3b71974dc 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -178,9 +178,9 @@ extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb); /* br_if.c */ extern void br_port_carrier_check(struct net_bridge_port *p); -extern int br_add_bridge(const char *name); -extern int br_del_bridge(const char *name); -extern void br_cleanup_bridges(void); +extern int br_add_bridge(struct net *net, const char *name); +extern int br_del_bridge(struct net *net, const char *name); +extern void br_net_exit(struct net *net); extern int br_add_if(struct net_bridge *br, struct net_device *dev); extern int br_del_if(struct net_bridge *br, diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 8b200f96f722..81ae40b3f655 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -140,9 +140,6 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, struct net_bridge *br; const unsigned char *buf; - if (!net_eq(dev_net(dev), &init_net)) - goto err; - if (!p) goto err; diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 27d6a511c8c1..158dee8b4965 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -29,11 +29,12 @@ */ static ssize_t store_bridge_parm(struct device *d, const char *buf, size_t len, - void (*set)(struct net_bridge *, unsigned long)) + int (*set)(struct net_bridge *, unsigned long)) { struct net_bridge *br = to_bridge(d); char *endp; unsigned long val; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -43,9 +44,9 @@ static ssize_t store_bridge_parm(struct device *d, return -EINVAL; spin_lock_bh(&br->lock); - (*set)(br, val); + err = (*set)(br, val); spin_unlock_bh(&br->lock); - return len; + return err ? err : len; } @@ -56,12 +57,13 @@ static ssize_t show_forward_delay(struct device *d, return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->forward_delay)); } -static void set_forward_delay(struct net_bridge *br, unsigned long val) +static int set_forward_delay(struct net_bridge *br, unsigned long val) { unsigned long delay = clock_t_to_jiffies(val); br->forward_delay = delay; if (br_is_root_bridge(br)) br->bridge_forward_delay = delay; + return 0; } static ssize_t store_forward_delay(struct device *d, @@ -80,12 +82,17 @@ static ssize_t show_hello_time(struct device *d, struct device_attribute *attr, jiffies_to_clock_t(to_bridge(d)->hello_time)); } -static void set_hello_time(struct net_bridge *br, unsigned long val) +static int set_hello_time(struct net_bridge *br, unsigned long val) { unsigned long t = clock_t_to_jiffies(val); + + if (t < HZ) + return -EINVAL; + br->hello_time = t; if (br_is_root_bridge(br)) br->bridge_hello_time = t; + return 0; } static ssize_t store_hello_time(struct device *d, @@ -104,12 +111,13 @@ static ssize_t show_max_age(struct device *d, struct device_attribute *attr, jiffies_to_clock_t(to_bridge(d)->max_age)); } -static void set_max_age(struct net_bridge *br, unsigned long val) +static int set_max_age(struct net_bridge *br, unsigned long val) { unsigned long t = clock_t_to_jiffies(val); br->max_age = t; if (br_is_root_bridge(br)) br->bridge_max_age = t; + return 0; } static ssize_t store_max_age(struct device *d, struct device_attribute *attr, @@ -126,9 +134,10 @@ static ssize_t show_ageing_time(struct device *d, return sprintf(buf, "%lu\n", jiffies_to_clock_t(br->ageing_time)); } -static void set_ageing_time(struct net_bridge *br, unsigned long val) +static int set_ageing_time(struct net_bridge *br, unsigned long val) { br->ageing_time = clock_t_to_jiffies(val); + return 0; } static ssize_t store_ageing_time(struct device *d, @@ -180,9 +189,10 @@ static ssize_t show_priority(struct device *d, struct device_attribute *attr, (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1]); } -static void set_priority(struct net_bridge *br, unsigned long val) +static int set_priority(struct net_bridge *br, unsigned long val) { br_stp_set_bridge_priority(br, (u16) val); + return 0; } static ssize_t store_priority(struct device *d, struct device_attribute *attr, diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 909479794999..ba6f73eb06c6 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -2,21 +2,22 @@ # Bridge netfilter configuration # -menu "Bridge: Netfilter Configuration" - depends on BRIDGE && BRIDGE_NETFILTER - -config BRIDGE_NF_EBTABLES +menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" + depends on BRIDGE && BRIDGE_NETFILTER + select NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification framework. Say 'Y' or 'M' here if you want to do Ethernet filtering/NAT/brouting on the Ethernet bridge. + +if BRIDGE_NF_EBTABLES + # # tables # config BRIDGE_EBT_BROUTE tristate "ebt: broute table support" - depends on BRIDGE_NF_EBTABLES help The ebtables broute table is used to define rules that decide between bridging and routing frames, giving Linux the functionality of a @@ -27,7 +28,6 @@ config BRIDGE_EBT_BROUTE config BRIDGE_EBT_T_FILTER tristate "ebt: filter table support" - depends on BRIDGE_NF_EBTABLES help The ebtables filter table is used to define frame filtering rules at local input, forwarding and local output. See the man page for @@ -37,7 +37,6 @@ config BRIDGE_EBT_T_FILTER config BRIDGE_EBT_T_NAT tristate "ebt: nat table support" - depends on BRIDGE_NF_EBTABLES help The ebtables nat table is used to define rules that alter the MAC source address (MAC SNAT) or the MAC destination address (MAC DNAT). @@ -49,7 +48,6 @@ config BRIDGE_EBT_T_NAT # config BRIDGE_EBT_802_3 tristate "ebt: 802.3 filter support" - depends on BRIDGE_NF_EBTABLES help This option adds matching support for 802.3 Ethernet frames. @@ -57,7 +55,6 @@ config BRIDGE_EBT_802_3 config BRIDGE_EBT_AMONG tristate "ebt: among filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the among match, which allows matching the MAC source and/or destination address on a list of addresses. Optionally, @@ -67,7 +64,6 @@ config BRIDGE_EBT_AMONG config BRIDGE_EBT_ARP tristate "ebt: ARP filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the ARP match, which allows ARP and RARP header field filtering. @@ -76,7 +72,6 @@ config BRIDGE_EBT_ARP config BRIDGE_EBT_IP tristate "ebt: IP filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the IP match, which allows basic IP header field filtering. @@ -94,7 +89,6 @@ config BRIDGE_EBT_IP6 config BRIDGE_EBT_LIMIT tristate "ebt: limit match support" - depends on BRIDGE_NF_EBTABLES help This option adds the limit match, which allows you to control the rate at which a rule can be matched. This match is the @@ -105,7 +99,6 @@ config BRIDGE_EBT_LIMIT config BRIDGE_EBT_MARK tristate "ebt: mark filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the mark match, which allows matching frames based on the 'nfmark' value in the frame. This can be set by the mark target. @@ -116,7 +109,6 @@ config BRIDGE_EBT_MARK config BRIDGE_EBT_PKTTYPE tristate "ebt: packet type filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the packet type match, which allows matching on the type of packet based on its Ethernet "class" (as determined by @@ -127,7 +119,6 @@ config BRIDGE_EBT_PKTTYPE config BRIDGE_EBT_STP tristate "ebt: STP filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the Spanning Tree Protocol match, which allows STP header field filtering. @@ -136,7 +127,6 @@ config BRIDGE_EBT_STP config BRIDGE_EBT_VLAN tristate "ebt: 802.1Q VLAN filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the 802.1Q vlan match, which allows the filtering of 802.1Q vlan fields. @@ -156,7 +146,6 @@ config BRIDGE_EBT_ARPREPLY config BRIDGE_EBT_DNAT tristate "ebt: dnat target support" - depends on BRIDGE_NF_EBTABLES help This option adds the MAC DNAT target, which allows altering the MAC destination address of frames. @@ -165,7 +154,6 @@ config BRIDGE_EBT_DNAT config BRIDGE_EBT_MARK_T tristate "ebt: mark target support" - depends on BRIDGE_NF_EBTABLES help This option adds the mark target, which allows marking frames by setting the 'nfmark' value in the frame. @@ -176,7 +164,6 @@ config BRIDGE_EBT_MARK_T config BRIDGE_EBT_REDIRECT tristate "ebt: redirect target support" - depends on BRIDGE_NF_EBTABLES help This option adds the MAC redirect target, which allows altering the MAC destination address of a frame to that of the device it arrived on. @@ -185,7 +172,6 @@ config BRIDGE_EBT_REDIRECT config BRIDGE_EBT_SNAT tristate "ebt: snat target support" - depends on BRIDGE_NF_EBTABLES help This option adds the MAC SNAT target, which allows altering the MAC source address of frames. @@ -196,7 +182,6 @@ config BRIDGE_EBT_SNAT # config BRIDGE_EBT_LOG tristate "ebt: log support" - depends on BRIDGE_NF_EBTABLES help This option adds the log watcher, that you can use in any rule in any ebtables table. It records info about the frame header @@ -206,7 +191,6 @@ config BRIDGE_EBT_LOG config BRIDGE_EBT_ULOG tristate "ebt: ulog support (OBSOLETE)" - depends on BRIDGE_NF_EBTABLES help This option enables the old bridge-specific "ebt_ulog" implementation which has been obsoleted by the new "nfnetlink_log" code (see @@ -223,7 +207,6 @@ config BRIDGE_EBT_ULOG config BRIDGE_EBT_NFLOG tristate "ebt: nflog support" - depends on BRIDGE_NF_EBTABLES help This option enables the nflog watcher, which allows to LOG messages through the netfilter logging API, which can use @@ -235,4 +218,4 @@ config BRIDGE_EBT_NFLOG To compile it as a module, choose M here. If unsure, say N. -endmenu +endif # BRIDGE_NF_EBTABLES diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c index 98534025360f..bd91dc58d49b 100644 --- a/net/bridge/netfilter/ebt_802_3.c +++ b/net/bridge/netfilter/ebt_802_3.c @@ -7,64 +7,63 @@ * May 2003 * */ - +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_802_3.h> -#include <linux/module.h> -static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, unsigned int datalen) +static bool +ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_802_3_info *info = data; + const struct ebt_802_3_info *info = par->matchinfo; const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb); __be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; if (info->bitmask & EBT_802_3_SAP) { if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP)) - return EBT_NOMATCH; + return false; if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_802_3_TYPE) { if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE)) - return EBT_NOMATCH; + return false; if (FWINV(info->type != type, EBT_802_3_TYPE)) - return EBT_NOMATCH; + return false; } - return EBT_MATCH; + return true; } -static struct ebt_match filter_802_3; -static int ebt_802_3_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_802_3_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_802_3_info *info = data; + const struct ebt_802_3_info *info = par->matchinfo; - if (datalen < sizeof(struct ebt_802_3_info)) - return -EINVAL; if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK) - return -EINVAL; + return false; - return 0; + return true; } -static struct ebt_match filter_802_3 __read_mostly = { - .name = EBT_802_3_MATCH, - .match = ebt_filter_802_3, - .check = ebt_802_3_check, +static struct xt_match ebt_802_3_mt_reg __read_mostly = { + .name = "802_3", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_802_3_mt, + .checkentry = ebt_802_3_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_802_3_info)), .me = THIS_MODULE, }; static int __init ebt_802_3_init(void) { - return ebt_register_match(&filter_802_3); + return xt_register_match(&ebt_802_3_mt_reg); } static void __exit ebt_802_3_fini(void) { - ebt_unregister_match(&filter_802_3); + xt_unregister_match(&ebt_802_3_mt_reg); } module_init(ebt_802_3_init); diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 70b6dca5ea75..b595f091f35b 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -7,15 +7,15 @@ * August, 2003 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_among.h> #include <linux/ip.h> #include <linux/if_arp.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_among.h> -static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, - const char *mac, __be32 ip) +static bool ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, + const char *mac, __be32 ip) { /* You may be puzzled as to how this code works. * Some tricks were used, refer to @@ -33,23 +33,19 @@ static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, if (ip) { for (i = start; i < limit; i++) { p = &wh->pool[i]; - if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) { - if (p->ip == 0 || p->ip == ip) { - return 1; - } - } + if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) + if (p->ip == 0 || p->ip == ip) + return true; } } else { for (i = start; i < limit; i++) { p = &wh->pool[i]; - if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) { - if (p->ip == 0) { - return 1; - } - } + if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) + if (p->ip == 0) + return true; } } - return 0; + return false; } static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash @@ -131,12 +127,10 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr) return 0; } -static int ebt_filter_among(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_among_info *info = data; + const struct ebt_among_info *info = par->matchinfo; const char *dmac, *smac; const struct ebt_mac_wormhash *wh_dst, *wh_src; __be32 dip = 0, sip = 0; @@ -147,41 +141,41 @@ static int ebt_filter_among(const struct sk_buff *skb, if (wh_src) { smac = eth_hdr(skb)->h_source; if (get_ip_src(skb, &sip)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_AMONG_SRC_NEG)) { /* we match only if it contains */ if (!ebt_mac_wormhash_contains(wh_src, smac, sip)) - return EBT_NOMATCH; + return false; } else { /* we match only if it DOES NOT contain */ if (ebt_mac_wormhash_contains(wh_src, smac, sip)) - return EBT_NOMATCH; + return false; } } if (wh_dst) { dmac = eth_hdr(skb)->h_dest; if (get_ip_dst(skb, &dip)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_AMONG_DST_NEG)) { /* we match only if it contains */ if (!ebt_mac_wormhash_contains(wh_dst, dmac, dip)) - return EBT_NOMATCH; + return false; } else { /* we match only if it DOES NOT contain */ if (ebt_mac_wormhash_contains(wh_dst, dmac, dip)) - return EBT_NOMATCH; + return false; } } - return EBT_MATCH; + return true; } -static int ebt_among_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, - unsigned int datalen) +static bool ebt_among_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_among_info *info = data; + const struct ebt_among_info *info = par->matchinfo; + const struct ebt_entry_match *em = + container_of(par->matchinfo, const struct ebt_entry_match, data); int expected_length = sizeof(struct ebt_among_info); const struct ebt_mac_wormhash *wh_dst, *wh_src; int err; @@ -191,42 +185,45 @@ static int ebt_among_check(const char *tablename, unsigned int hookmask, expected_length += ebt_mac_wormhash_size(wh_dst); expected_length += ebt_mac_wormhash_size(wh_src); - if (datalen != EBT_ALIGN(expected_length)) { + if (em->match_size != EBT_ALIGN(expected_length)) { printk(KERN_WARNING "ebtables: among: wrong size: %d " "against expected %d, rounded to %Zd\n", - datalen, expected_length, + em->match_size, expected_length, EBT_ALIGN(expected_length)); - return -EINVAL; + return false; } if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) { printk(KERN_WARNING "ebtables: among: dst integrity fail: %x\n", -err); - return -EINVAL; + return false; } if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) { printk(KERN_WARNING "ebtables: among: src integrity fail: %x\n", -err); - return -EINVAL; + return false; } - return 0; + return true; } -static struct ebt_match filter_among __read_mostly = { - .name = EBT_AMONG_MATCH, - .match = ebt_filter_among, - .check = ebt_among_check, +static struct xt_match ebt_among_mt_reg __read_mostly = { + .name = "among", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_among_mt, + .checkentry = ebt_among_mt_check, + .matchsize = -1, /* special case */ .me = THIS_MODULE, }; static int __init ebt_among_init(void) { - return ebt_register_match(&filter_among); + return xt_register_match(&ebt_among_mt_reg); } static void __exit ebt_among_fini(void) { - ebt_unregister_match(&filter_among); + xt_unregister_match(&ebt_among_mt_reg); } module_init(ebt_among_init); diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c index 7c535be75665..b7ad60419f9a 100644 --- a/net/bridge/netfilter/ebt_arp.c +++ b/net/bridge/netfilter/ebt_arp.c @@ -8,58 +8,58 @@ * April, 2002 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_arp.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_arp.h> -static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, unsigned int datalen) +static bool +ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_arp_info *info = data; + const struct ebt_arp_info *info = par->matchinfo; const struct arphdr *ah; struct arphdr _arph; ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode != ah->ar_op, EBT_ARP_OPCODE)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype != ah->ar_hrd, EBT_ARP_HTYPE)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype != ah->ar_pro, EBT_ARP_PTYPE)) - return EBT_NOMATCH; + return false; if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) { const __be32 *sap, *dap; __be32 saddr, daddr; if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP)) - return EBT_NOMATCH; + return false; sap = skb_header_pointer(skb, sizeof(struct arphdr) + ah->ar_hln, sizeof(saddr), &saddr); if (sap == NULL) - return EBT_NOMATCH; + return false; dap = skb_header_pointer(skb, sizeof(struct arphdr) + 2*ah->ar_hln+sizeof(saddr), sizeof(daddr), &daddr); if (dap == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_SRC_IP && FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_DST_IP && FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_GRAT && FWINV(*dap != *sap, EBT_ARP_GRAT)) - return EBT_NOMATCH; + return false; } if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) { @@ -68,18 +68,18 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in uint8_t verdict, i; if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_SRC_MAC) { mp = skb_header_pointer(skb, sizeof(struct arphdr), sizeof(_mac), &_mac); if (mp == NULL) - return EBT_NOMATCH; + return false; verdict = 0; for (i = 0; i < 6; i++) verdict |= (mp[i] ^ info->smaddr[i]) & info->smmsk[i]; if (FWINV(verdict != 0, EBT_ARP_SRC_MAC)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_ARP_DST_MAC) { @@ -87,50 +87,51 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in ah->ar_hln + ah->ar_pln, sizeof(_mac), &_mac); if (mp == NULL) - return EBT_NOMATCH; + return false; verdict = 0; for (i = 0; i < 6; i++) verdict |= (mp[i] ^ info->dmaddr[i]) & info->dmmsk[i]; if (FWINV(verdict != 0, EBT_ARP_DST_MAC)) - return EBT_NOMATCH; + return false; } } - return EBT_MATCH; + return true; } -static int ebt_arp_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_arp_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_arp_info *info = data; + const struct ebt_arp_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_arp_info))) - return -EINVAL; if ((e->ethproto != htons(ETH_P_ARP) && e->ethproto != htons(ETH_P_RARP)) || e->invflags & EBT_IPROTO) - return -EINVAL; + return false; if (info->bitmask & ~EBT_ARP_MASK || info->invflags & ~EBT_ARP_MASK) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_match filter_arp __read_mostly = { - .name = EBT_ARP_MATCH, - .match = ebt_filter_arp, - .check = ebt_arp_check, +static struct xt_match ebt_arp_mt_reg __read_mostly = { + .name = "arp", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_arp_mt, + .checkentry = ebt_arp_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_arp_info)), .me = THIS_MODULE, }; static int __init ebt_arp_init(void) { - return ebt_register_match(&filter_arp); + return xt_register_match(&ebt_arp_mt_reg); } static void __exit ebt_arp_fini(void) { - ebt_unregister_match(&filter_arp); + xt_unregister_match(&ebt_arp_mt_reg); } module_init(ebt_arp_init); diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 0c4279590fc7..76584cd72e57 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -8,18 +8,17 @@ * August, 2003 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_arpreply.h> #include <linux/if_arp.h> #include <net/arp.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_arpreply.h> -static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par) { - struct ebt_arpreply_info *info = (void *)data; + const struct ebt_arpreply_info *info = par->targinfo; const __be32 *siptr, *diptr; __be32 _sip, _dip; const struct arphdr *ap; @@ -52,45 +51,45 @@ static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr, if (diptr == NULL) return EBT_DROP; - arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)in, + arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)par->in, *diptr, shp, info->mac, shp); return info->target; } -static int ebt_target_reply_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_arpreply_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_arpreply_info *info = data; + const struct ebt_arpreply_info *info = par->targinfo; + const struct ebt_entry *e = par->entryinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_arpreply_info))) - return -EINVAL; if (BASE_CHAIN && info->target == EBT_RETURN) - return -EINVAL; + return false; if (e->ethproto != htons(ETH_P_ARP) || e->invflags & EBT_IPROTO) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING)) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target reply_target __read_mostly = { - .name = EBT_ARPREPLY_TARGET, - .target = ebt_target_reply, - .check = ebt_target_reply_check, +static struct xt_target ebt_arpreply_tg_reg __read_mostly = { + .name = "arpreply", + .revision = 0, + .family = NFPROTO_BRIDGE, + .table = "nat", + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING), + .target = ebt_arpreply_tg, + .checkentry = ebt_arpreply_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_arpreply_info)), .me = THIS_MODULE, }; static int __init ebt_arpreply_init(void) { - return ebt_register_target(&reply_target); + return xt_register_target(&ebt_arpreply_tg_reg); } static void __exit ebt_arpreply_fini(void) { - ebt_unregister_target(&reply_target); + xt_unregister_target(&ebt_arpreply_tg_reg); } module_init(ebt_arpreply_init); diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index ca64c1cc1b47..6b49ea9e31fb 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -7,18 +7,17 @@ * June, 2002 * */ - +#include <linux/module.h> +#include <net/sock.h> #include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nat.h> -#include <linux/module.h> -#include <net/sock.h> -static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_nat_info *info = data; + const struct ebt_nat_info *info = par->targinfo; if (!skb_make_writable(skb, 0)) return EBT_DROP; @@ -27,40 +26,46 @@ static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr, return info->target; } -static int ebt_target_dnat_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_dnat_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_nat_info *info = data; + const struct ebt_nat_info *info = par->targinfo; + unsigned int hook_mask; if (BASE_CHAIN && info->target == EBT_RETURN) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if ( (strcmp(tablename, "nat") || - (hookmask & ~((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT)))) && - (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) ) - return -EINVAL; - if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info))) - return -EINVAL; + return false; + + hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); + if ((strcmp(par->table, "nat") != 0 || + (hook_mask & ~((1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_OUT)))) && + (strcmp(par->table, "broute") != 0 || + hook_mask & ~(1 << NF_BR_BROUTING))) + return false; if (INVALID_TARGET) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target dnat __read_mostly = { - .name = EBT_DNAT_TARGET, - .target = ebt_target_dnat, - .check = ebt_target_dnat_check, +static struct xt_target ebt_dnat_tg_reg __read_mostly = { + .name = "dnat", + .revision = 0, + .family = NFPROTO_BRIDGE, + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_BROUTING), + .target = ebt_dnat_tg, + .checkentry = ebt_dnat_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_nat_info)), .me = THIS_MODULE, }; static int __init ebt_dnat_init(void) { - return ebt_register_target(&dnat); + return xt_register_target(&ebt_dnat_tg_reg); } static void __exit ebt_dnat_fini(void) { - ebt_unregister_target(&dnat); + xt_unregister_target(&ebt_dnat_tg_reg); } module_init(ebt_dnat_init); diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index 65caa00dcf2a..d771bbfbcbe6 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -11,24 +11,23 @@ * Innominate Security Technologies AG <mhopf@innominate.com> * September, 2002 */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_ip.h> #include <linux/ip.h> #include <net/ip.h> #include <linux/in.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ip.h> struct tcpudphdr { __be16 src; __be16 dst; }; -static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_ip_info *info = data; + const struct ebt_ip_info *info = par->matchinfo; const struct iphdr *ih; struct iphdr _iph; const struct tcpudphdr *pptr; @@ -36,92 +35,93 @@ static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in, ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_TOS && FWINV(info->tos != ih->tos, EBT_IP_TOS)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_SOURCE && FWINV((ih->saddr & info->smsk) != info->saddr, EBT_IP_SOURCE)) - return EBT_NOMATCH; + return false; if ((info->bitmask & EBT_IP_DEST) && FWINV((ih->daddr & info->dmsk) != info->daddr, EBT_IP_DEST)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_PROTO) { if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_IP_DPORT) && !(info->bitmask & EBT_IP_SPORT)) - return EBT_MATCH; + return true; if (ntohs(ih->frag_off) & IP_OFFSET) - return EBT_NOMATCH; + return false; pptr = skb_header_pointer(skb, ih->ihl*4, sizeof(_ports), &_ports); if (pptr == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_DPORT) { u32 dst = ntohs(pptr->dst); if (FWINV(dst < info->dport[0] || dst > info->dport[1], EBT_IP_DPORT)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_IP_SPORT) { u32 src = ntohs(pptr->src); if (FWINV(src < info->sport[0] || src > info->sport[1], EBT_IP_SPORT)) - return EBT_NOMATCH; + return false; } } - return EBT_MATCH; + return true; } -static int ebt_ip_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_ip_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_ip_info *info = data; + const struct ebt_ip_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_ip_info))) - return -EINVAL; if (e->ethproto != htons(ETH_P_IP) || e->invflags & EBT_IPROTO) - return -EINVAL; + return false; if (info->bitmask & ~EBT_IP_MASK || info->invflags & ~EBT_IP_MASK) - return -EINVAL; + return false; if (info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT)) { if (info->invflags & EBT_IP_PROTO) - return -EINVAL; + return false; if (info->protocol != IPPROTO_TCP && info->protocol != IPPROTO_UDP && info->protocol != IPPROTO_UDPLITE && info->protocol != IPPROTO_SCTP && info->protocol != IPPROTO_DCCP) - return -EINVAL; + return false; } if (info->bitmask & EBT_IP_DPORT && info->dport[0] > info->dport[1]) - return -EINVAL; + return false; if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1]) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_match filter_ip __read_mostly = { - .name = EBT_IP_MATCH, - .match = ebt_filter_ip, - .check = ebt_ip_check, +static struct xt_match ebt_ip_mt_reg __read_mostly = { + .name = "ip", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_ip_mt, + .checkentry = ebt_ip_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_ip_info)), .me = THIS_MODULE, }; static int __init ebt_ip_init(void) { - return ebt_register_match(&filter_ip); + return xt_register_match(&ebt_ip_mt_reg); } static void __exit ebt_ip_fini(void) { - ebt_unregister_match(&filter_ip); + xt_unregister_match(&ebt_ip_mt_reg); } module_init(ebt_ip_init); diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 36efb3a75249..784a6573876c 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -13,26 +13,24 @@ * * Jan, 2008 */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_ip6.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <linux/in.h> #include <linux/module.h> #include <net/dsfield.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ip6.h> struct tcpudphdr { __be16 src; __be16 dst; }; -static int ebt_filter_ip6(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_ip6_info *info = (struct ebt_ip6_info *)data; + const struct ebt_ip6_info *info = par->matchinfo; const struct ipv6hdr *ih6; struct ipv6hdr _ip6h; const struct tcpudphdr *pptr; @@ -42,100 +40,100 @@ static int ebt_filter_ip6(const struct sk_buff *skb, ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); if (ih6 == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP6_TCLASS && FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) - return EBT_NOMATCH; + return false; for (i = 0; i < 4; i++) tmp_addr.in6_u.u6_addr32[i] = ih6->saddr.in6_u.u6_addr32[i] & info->smsk.in6_u.u6_addr32[i]; if (info->bitmask & EBT_IP6_SOURCE && FWINV((ipv6_addr_cmp(&tmp_addr, &info->saddr) != 0), EBT_IP6_SOURCE)) - return EBT_NOMATCH; + return false; for (i = 0; i < 4; i++) tmp_addr.in6_u.u6_addr32[i] = ih6->daddr.in6_u.u6_addr32[i] & info->dmsk.in6_u.u6_addr32[i]; if (info->bitmask & EBT_IP6_DEST && FWINV((ipv6_addr_cmp(&tmp_addr, &info->daddr) != 0), EBT_IP6_DEST)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP6_PROTO) { uint8_t nexthdr = ih6->nexthdr; int offset_ph; offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr); if (offset_ph == -1) - return EBT_NOMATCH; + return false; if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_IP6_DPORT) && !(info->bitmask & EBT_IP6_SPORT)) - return EBT_MATCH; + return true; pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports), &_ports); if (pptr == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP6_DPORT) { u32 dst = ntohs(pptr->dst); if (FWINV(dst < info->dport[0] || dst > info->dport[1], EBT_IP6_DPORT)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_IP6_SPORT) { u32 src = ntohs(pptr->src); if (FWINV(src < info->sport[0] || src > info->sport[1], EBT_IP6_SPORT)) - return EBT_NOMATCH; + return false; } - return EBT_MATCH; + return true; } - return EBT_MATCH; + return true; } -static int ebt_ip6_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_ip6_mt_check(const struct xt_mtchk_param *par) { - struct ebt_ip6_info *info = (struct ebt_ip6_info *)data; + const struct ebt_entry *e = par->entryinfo; + struct ebt_ip6_info *info = par->matchinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_ip6_info))) - return -EINVAL; if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO) - return -EINVAL; + return false; if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK) - return -EINVAL; + return false; if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) { if (info->invflags & EBT_IP6_PROTO) - return -EINVAL; + return false; if (info->protocol != IPPROTO_TCP && info->protocol != IPPROTO_UDP && info->protocol != IPPROTO_UDPLITE && info->protocol != IPPROTO_SCTP && info->protocol != IPPROTO_DCCP) - return -EINVAL; + return false; } if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1]) - return -EINVAL; + return false; if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_match filter_ip6 = -{ - .name = EBT_IP6_MATCH, - .match = ebt_filter_ip6, - .check = ebt_ip6_check, +static struct xt_match ebt_ip6_mt_reg __read_mostly = { + .name = "ip6", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_ip6_mt, + .checkentry = ebt_ip6_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_ip6_info)), .me = THIS_MODULE, }; static int __init ebt_ip6_init(void) { - return ebt_register_match(&filter_ip6); + return xt_register_match(&ebt_ip6_mt_reg); } static void __exit ebt_ip6_fini(void) { - ebt_unregister_match(&filter_ip6); + xt_unregister_match(&ebt_ip6_mt_reg); } module_init(ebt_ip6_init); diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c index 8cbdc01c253e..f7bd9192ff0c 100644 --- a/net/bridge/netfilter/ebt_limit.c +++ b/net/bridge/netfilter/ebt_limit.c @@ -10,13 +10,12 @@ * September, 2003 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_limit.h> #include <linux/module.h> - #include <linux/netdevice.h> #include <linux/spinlock.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_limit.h> static DEFINE_SPINLOCK(limit_lock); @@ -31,11 +30,10 @@ static DEFINE_SPINLOCK(limit_lock); #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) -static int ebt_limit_match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static bool +ebt_limit_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - struct ebt_limit_info *info = (struct ebt_limit_info *)data; + struct ebt_limit_info *info = (void *)par->matchinfo; unsigned long now = jiffies; spin_lock_bh(&limit_lock); @@ -47,11 +45,11 @@ static int ebt_limit_match(const struct sk_buff *skb, /* We're not limited. */ info->credit -= info->cost; spin_unlock_bh(&limit_lock); - return EBT_MATCH; + return true; } spin_unlock_bh(&limit_lock); - return EBT_NOMATCH; + return false; } /* Precision saver. */ @@ -66,20 +64,16 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE; } -static int ebt_limit_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_limit_mt_check(const struct xt_mtchk_param *par) { - struct ebt_limit_info *info = data; - - if (datalen != EBT_ALIGN(sizeof(struct ebt_limit_info))) - return -EINVAL; + struct ebt_limit_info *info = par->matchinfo; /* Check for overflow. */ if (info->burst == 0 || user2credits(info->avg * info->burst) < user2credits(info->avg)) { printk("Overflow in ebt_limit, try lower: %u/%u\n", info->avg, info->burst); - return -EINVAL; + return false; } /* User avg in seconds * EBT_LIMIT_SCALE: convert to jiffies * 128. */ @@ -87,24 +81,27 @@ static int ebt_limit_check(const char *tablename, unsigned int hookmask, info->credit = user2credits(info->avg * info->burst); info->credit_cap = user2credits(info->avg * info->burst); info->cost = user2credits(info->avg); - return 0; + return true; } -static struct ebt_match ebt_limit_reg __read_mostly = { - .name = EBT_LIMIT_MATCH, - .match = ebt_limit_match, - .check = ebt_limit_check, +static struct xt_match ebt_limit_mt_reg __read_mostly = { + .name = "limit", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_limit_mt, + .checkentry = ebt_limit_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_limit_info)), .me = THIS_MODULE, }; static int __init ebt_limit_init(void) { - return ebt_register_match(&ebt_limit_reg); + return xt_register_match(&ebt_limit_mt_reg); } static void __exit ebt_limit_fini(void) { - ebt_unregister_match(&ebt_limit_reg); + xt_unregister_match(&ebt_limit_mt_reg); } module_init(ebt_limit_init); diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 2f430d4ae911..3d33c608906a 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -8,10 +8,6 @@ * April, 2002 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_log.h> -#include <linux/netfilter.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/in.h> @@ -21,22 +17,23 @@ #include <linux/ipv6.h> #include <net/ipv6.h> #include <linux/in6.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_log.h> +#include <linux/netfilter.h> static DEFINE_SPINLOCK(ebt_log_lock); -static int ebt_log_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_log_tg_check(const struct xt_tgchk_param *par) { - struct ebt_log_info *info = data; + struct ebt_log_info *info = par->targinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_log_info))) - return -EINVAL; if (info->bitmask & ~EBT_LOG_MASK) - return -EINVAL; + return false; if (info->loglevel >= 8) - return -EINVAL; + return false; info->prefix[EBT_LOG_PREFIX_SIZE - 1] = '\0'; - return 0; + return true; } struct tcpudphdr @@ -84,7 +81,7 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset) #define myNIPQUAD(a) a[0], a[1], a[2], a[3] static void -ebt_log_packet(unsigned int pf, unsigned int hooknum, +ebt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) @@ -194,11 +191,10 @@ out: } -static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_log_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_log_info *info = data; + const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_LOG; @@ -206,18 +202,21 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, li.u.log.logflags = info->bitmask; if (info->bitmask & EBT_LOG_NFLOG) - nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, - "%s", info->prefix); + nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, + par->out, &li, "%s", info->prefix); else - ebt_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, - info->prefix); + ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, + par->out, &li, info->prefix); + return EBT_CONTINUE; } -static struct ebt_watcher log = -{ - .name = EBT_LOG_WATCHER, - .watcher = ebt_log, - .check = ebt_log_check, +static struct xt_target ebt_log_tg_reg __read_mostly = { + .name = "log", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_log_tg, + .checkentry = ebt_log_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_log_info)), .me = THIS_MODULE, }; @@ -231,17 +230,17 @@ static int __init ebt_log_init(void) { int ret; - ret = ebt_register_watcher(&log); + ret = xt_register_target(&ebt_log_tg_reg); if (ret < 0) return ret; - nf_log_register(PF_BRIDGE, &ebt_log_logger); + nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger); return 0; } static void __exit ebt_log_fini(void) { nf_log_unregister(&ebt_log_logger); - ebt_unregister_watcher(&log); + xt_unregister_target(&ebt_log_tg_reg); } module_init(ebt_log_init); diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 36723f47db0a..2fee7e8e2e93 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -13,15 +13,15 @@ * Marking a frame doesn't really change anything in the frame anyway. */ +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_mark_t.h> -#include <linux/module.h> -static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_mark_t_info *info = data; + const struct ebt_mark_t_info *info = par->targinfo; int action = info->target & -16; if (action == MARK_SET_VALUE) @@ -36,42 +36,41 @@ static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr, return info->target | ~EBT_VERDICT_BITS; } -static int ebt_target_mark_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_mark_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_mark_t_info *info = data; + const struct ebt_mark_t_info *info = par->targinfo; int tmp; - if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info))) - return -EINVAL; tmp = info->target | ~EBT_VERDICT_BITS; if (BASE_CHAIN && tmp == EBT_RETURN) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; + return false; if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) - return -EINVAL; + return false; tmp = info->target & ~EBT_VERDICT_BITS; if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE && tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target mark_target __read_mostly = { - .name = EBT_MARK_TARGET, - .target = ebt_target_mark, - .check = ebt_target_mark_check, +static struct xt_target ebt_mark_tg_reg __read_mostly = { + .name = "mark", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_mark_tg, + .checkentry = ebt_mark_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_mark_t_info)), .me = THIS_MODULE, }; static int __init ebt_mark_init(void) { - return ebt_register_target(&mark_target); + return xt_register_target(&ebt_mark_tg_reg); } static void __exit ebt_mark_fini(void) { - ebt_unregister_target(&mark_target); + xt_unregister_target(&ebt_mark_tg_reg); } module_init(ebt_mark_init); diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c index 9b0a4543861f..ea570f214b1d 100644 --- a/net/bridge/netfilter/ebt_mark_m.c +++ b/net/bridge/netfilter/ebt_mark_m.c @@ -7,53 +7,52 @@ * July, 2002 * */ - +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_mark_m.h> -#include <linux/module.h> -static int ebt_filter_mark(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_mark_m_info *info = data; + const struct ebt_mark_m_info *info = par->matchinfo; if (info->bitmask & EBT_MARK_OR) - return !(!!(skb->mark & info->mask) ^ info->invert); - return !(((skb->mark & info->mask) == info->mark) ^ info->invert); + return !!(skb->mark & info->mask) ^ info->invert; + return ((skb->mark & info->mask) == info->mark) ^ info->invert; } -static int ebt_mark_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_mark_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_mark_m_info *info = data; + const struct ebt_mark_m_info *info = par->matchinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_m_info))) - return -EINVAL; if (info->bitmask & ~EBT_MARK_MASK) - return -EINVAL; + return false; if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND)) - return -EINVAL; + return false; if (!info->bitmask) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_match filter_mark __read_mostly = { - .name = EBT_MARK_MATCH, - .match = ebt_filter_mark, - .check = ebt_mark_check, +static struct xt_match ebt_mark_mt_reg __read_mostly = { + .name = "mark_m", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_mark_mt, + .checkentry = ebt_mark_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_mark_m_info)), .me = THIS_MODULE, }; static int __init ebt_mark_m_init(void) { - return ebt_register_match(&filter_mark); + return xt_register_match(&ebt_mark_mt_reg); } static void __exit ebt_mark_m_fini(void) { - ebt_unregister_match(&filter_mark); + xt_unregister_match(&ebt_mark_mt_reg); } module_init(ebt_mark_m_init); diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 8e799aa9e560..2a63d996dd4e 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -14,17 +14,15 @@ #include <linux/module.h> #include <linux/spinlock.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nflog.h> #include <net/netfilter/nf_log.h> -static void ebt_nflog(const struct sk_buff *skb, - unsigned int hooknr, - const struct net_device *in, - const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par) { - struct ebt_nflog_info *info = (struct ebt_nflog_info *)data; + const struct ebt_nflog_info *info = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_ULOG; @@ -32,39 +30,39 @@ static void ebt_nflog(const struct sk_buff *skb, li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, "%s", info->prefix); + nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out, + &li, "%s", info->prefix); + return EBT_CONTINUE; } -static int ebt_nflog_check(const char *tablename, - unsigned int hookmask, - const struct ebt_entry *e, - void *data, unsigned int datalen) +static bool ebt_nflog_tg_check(const struct xt_tgchk_param *par) { - struct ebt_nflog_info *info = (struct ebt_nflog_info *)data; + struct ebt_nflog_info *info = par->targinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_nflog_info))) - return -EINVAL; if (info->flags & ~EBT_NFLOG_MASK) - return -EINVAL; + return false; info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0'; - return 0; + return true; } -static struct ebt_watcher nflog __read_mostly = { - .name = EBT_NFLOG_WATCHER, - .watcher = ebt_nflog, - .check = ebt_nflog_check, - .me = THIS_MODULE, +static struct xt_target ebt_nflog_tg_reg __read_mostly = { + .name = "nflog", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_nflog_tg, + .checkentry = ebt_nflog_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_nflog_info)), + .me = THIS_MODULE, }; static int __init ebt_nflog_init(void) { - return ebt_register_watcher(&nflog); + return xt_register_target(&ebt_nflog_tg_reg); } static void __exit ebt_nflog_fini(void) { - ebt_unregister_watcher(&nflog); + xt_unregister_target(&ebt_nflog_tg_reg); } module_init(ebt_nflog_init); diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c index 676db32df3d1..883e96e2a542 100644 --- a/net/bridge/netfilter/ebt_pkttype.c +++ b/net/bridge/netfilter/ebt_pkttype.c @@ -7,50 +7,47 @@ * April, 2003 * */ - +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_pkttype.h> -#include <linux/module.h> -static int ebt_filter_pkttype(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *data, - unsigned int datalen) +static bool +ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_pkttype_info *info = data; + const struct ebt_pkttype_info *info = par->matchinfo; - return (skb->pkt_type != info->pkt_type) ^ info->invert; + return (skb->pkt_type == info->pkt_type) ^ info->invert; } -static int ebt_pkttype_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_pkttype_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_pkttype_info *info = data; + const struct ebt_pkttype_info *info = par->matchinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_pkttype_info))) - return -EINVAL; if (info->invert != 0 && info->invert != 1) - return -EINVAL; + return false; /* Allow any pkt_type value */ - return 0; + return true; } -static struct ebt_match filter_pkttype __read_mostly = { - .name = EBT_PKTTYPE_MATCH, - .match = ebt_filter_pkttype, - .check = ebt_pkttype_check, +static struct xt_match ebt_pkttype_mt_reg __read_mostly = { + .name = "pkttype", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_pkttype_mt, + .checkentry = ebt_pkttype_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_pkttype_info)), .me = THIS_MODULE, }; static int __init ebt_pkttype_init(void) { - return ebt_register_match(&filter_pkttype); + return xt_register_match(&ebt_pkttype_mt_reg); } static void __exit ebt_pkttype_fini(void) { - ebt_unregister_match(&filter_pkttype); + xt_unregister_match(&ebt_pkttype_mt_reg); } module_init(ebt_pkttype_init); diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index b8afe850cf1e..c8a49f7a57ba 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -7,65 +7,70 @@ * April, 2002 * */ - -#include <linux/netfilter.h> -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_redirect.h> #include <linux/module.h> #include <net/sock.h> #include "../br_private.h" +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_redirect.h> -static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_redirect_info *info = data; + const struct ebt_redirect_info *info = par->targinfo; if (!skb_make_writable(skb, 0)) return EBT_DROP; - if (hooknr != NF_BR_BROUTING) + if (par->hooknum != NF_BR_BROUTING) memcpy(eth_hdr(skb)->h_dest, - in->br_port->br->dev->dev_addr, ETH_ALEN); + par->in->br_port->br->dev->dev_addr, ETH_ALEN); else - memcpy(eth_hdr(skb)->h_dest, in->dev_addr, ETH_ALEN); + memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN); skb->pkt_type = PACKET_HOST; return info->target; } -static int ebt_target_redirect_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_redirect_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_redirect_info *info = data; + const struct ebt_redirect_info *info = par->targinfo; + unsigned int hook_mask; - if (datalen != EBT_ALIGN(sizeof(struct ebt_redirect_info))) - return -EINVAL; if (BASE_CHAIN && info->target == EBT_RETURN) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if ( (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING)) && - (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) ) - return -EINVAL; + return false; + + hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); + if ((strcmp(par->table, "nat") != 0 || + hook_mask & ~(1 << NF_BR_PRE_ROUTING)) && + (strcmp(par->table, "broute") != 0 || + hook_mask & ~(1 << NF_BR_BROUTING))) + return false; if (INVALID_TARGET) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target redirect_target __read_mostly = { - .name = EBT_REDIRECT_TARGET, - .target = ebt_target_redirect, - .check = ebt_target_redirect_check, +static struct xt_target ebt_redirect_tg_reg __read_mostly = { + .name = "redirect", + .revision = 0, + .family = NFPROTO_BRIDGE, + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_BROUTING), + .target = ebt_redirect_tg, + .checkentry = ebt_redirect_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_redirect_info)), .me = THIS_MODULE, }; static int __init ebt_redirect_init(void) { - return ebt_register_target(&redirect_target); + return xt_register_target(&ebt_redirect_tg_reg); } static void __exit ebt_redirect_fini(void) { - ebt_unregister_target(&redirect_target); + xt_unregister_target(&ebt_redirect_tg_reg); } module_init(ebt_redirect_init); diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index 5425333dda03..8d04d4c302bd 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -7,20 +7,19 @@ * June, 2002 * */ - -#include <linux/netfilter.h> -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_nat.h> #include <linux/module.h> #include <net/sock.h> #include <linux/if_arp.h> #include <net/arp.h> +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_nat.h> -static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_snat_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_nat_info *info = data; + const struct ebt_nat_info *info = par->targinfo; if (!skb_make_writable(skb, 0)) return EBT_DROP; @@ -43,46 +42,43 @@ out: return info->target | ~EBT_VERDICT_BITS; } -static int ebt_target_snat_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_snat_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_nat_info *info = data; + const struct ebt_nat_info *info = par->targinfo; int tmp; - if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info))) - return -EINVAL; tmp = info->target | ~EBT_VERDICT_BITS; if (BASE_CHAIN && tmp == EBT_RETURN) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if (strcmp(tablename, "nat")) - return -EINVAL; - if (hookmask & ~(1 << NF_BR_POST_ROUTING)) - return -EINVAL; + return false; if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) - return -EINVAL; + return false; tmp = info->target | EBT_VERDICT_BITS; if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target snat __read_mostly = { - .name = EBT_SNAT_TARGET, - .target = ebt_target_snat, - .check = ebt_target_snat_check, +static struct xt_target ebt_snat_tg_reg __read_mostly = { + .name = "snat", + .revision = 0, + .family = NFPROTO_BRIDGE, + .table = "nat", + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_POST_ROUTING), + .target = ebt_snat_tg, + .checkentry = ebt_snat_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_nat_info)), .me = THIS_MODULE, }; static int __init ebt_snat_init(void) { - return ebt_register_target(&snat); + return xt_register_target(&ebt_snat_tg_reg); } static void __exit ebt_snat_fini(void) { - ebt_unregister_target(&snat); + xt_unregister_target(&ebt_snat_tg_reg); } module_init(ebt_snat_init); diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 40f36d37607d..48527e621626 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -7,11 +7,11 @@ * * July, 2003 */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_stp.h> #include <linux/etherdevice.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_stp.h> #define BPDU_TYPE_CONFIG 0 #define BPDU_TYPE_TCN 0x80 @@ -40,7 +40,7 @@ struct stp_config_pdu { #define NR16(p) (p[0] << 8 | p[1]) #define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) -static int ebt_filter_config(const struct ebt_stp_info *info, +static bool ebt_filter_config(const struct ebt_stp_info *info, const struct stp_config_pdu *stpc) { const struct ebt_stp_config_info *c; @@ -51,12 +51,12 @@ static int ebt_filter_config(const struct ebt_stp_info *info, c = &info->config; if ((info->bitmask & EBT_STP_FLAGS) && FWINV(c->flags != stpc->flags, EBT_STP_FLAGS)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_STP_ROOTPRIO) { v16 = NR16(stpc->root); if (FWINV(v16 < c->root_priol || v16 > c->root_priou, EBT_STP_ROOTPRIO)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_ROOTADDR) { verdict = 0; @@ -64,19 +64,19 @@ static int ebt_filter_config(const struct ebt_stp_info *info, verdict |= (stpc->root[2+i] ^ c->root_addr[i]) & c->root_addrmsk[i]; if (FWINV(verdict != 0, EBT_STP_ROOTADDR)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_ROOTCOST) { v32 = NR32(stpc->root_cost); if (FWINV(v32 < c->root_costl || v32 > c->root_costu, EBT_STP_ROOTCOST)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_SENDERPRIO) { v16 = NR16(stpc->sender); if (FWINV(v16 < c->sender_priol || v16 > c->sender_priou, EBT_STP_SENDERPRIO)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_SENDERADDR) { verdict = 0; @@ -84,60 +84,60 @@ static int ebt_filter_config(const struct ebt_stp_info *info, verdict |= (stpc->sender[2+i] ^ c->sender_addr[i]) & c->sender_addrmsk[i]; if (FWINV(verdict != 0, EBT_STP_SENDERADDR)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_PORT) { v16 = NR16(stpc->port); if (FWINV(v16 < c->portl || v16 > c->portu, EBT_STP_PORT)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_MSGAGE) { v16 = NR16(stpc->msg_age); if (FWINV(v16 < c->msg_agel || v16 > c->msg_ageu, EBT_STP_MSGAGE)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_MAXAGE) { v16 = NR16(stpc->max_age); if (FWINV(v16 < c->max_agel || v16 > c->max_ageu, EBT_STP_MAXAGE)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_HELLOTIME) { v16 = NR16(stpc->hello_time); if (FWINV(v16 < c->hello_timel || v16 > c->hello_timeu, EBT_STP_HELLOTIME)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_FWDD) { v16 = NR16(stpc->forward_delay); if (FWINV(v16 < c->forward_delayl || v16 > c->forward_delayu, EBT_STP_FWDD)) - return EBT_NOMATCH; + return false; } - return EBT_MATCH; + return true; } -static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, unsigned int datalen) +static bool +ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_stp_info *info = data; + const struct ebt_stp_info *info = par->matchinfo; const struct stp_header *sp; struct stp_header _stph; const uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph); if (sp == NULL) - return EBT_NOMATCH; + return false; /* The stp code only considers these */ if (memcmp(sp, header, sizeof(header))) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_STP_TYPE && FWINV(info->type != sp->type, EBT_STP_TYPE)) - return EBT_NOMATCH; + return false; if (sp->type == BPDU_TYPE_CONFIG && info->bitmask & EBT_STP_CONFIG_MASK) { @@ -147,48 +147,48 @@ static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in st = skb_header_pointer(skb, sizeof(_stph), sizeof(_stpc), &_stpc); if (st == NULL) - return EBT_NOMATCH; + return false; return ebt_filter_config(info, st); } - return EBT_MATCH; + return true; } -static int ebt_stp_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_stp_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_stp_info *info = data; - const unsigned int len = EBT_ALIGN(sizeof(struct ebt_stp_info)); + const struct ebt_stp_info *info = par->matchinfo; const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + const struct ebt_entry *e = par->entryinfo; if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK || !(info->bitmask & EBT_STP_MASK)) - return -EINVAL; - if (datalen != len) - return -EINVAL; + return false; /* Make sure the match only receives stp frames */ if (compare_ether_addr(e->destmac, bridge_ula) || compare_ether_addr(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC)) - return -EINVAL; + return false; - return 0; + return true; } -static struct ebt_match filter_stp __read_mostly = { - .name = EBT_STP_MATCH, - .match = ebt_filter_stp, - .check = ebt_stp_check, +static struct xt_match ebt_stp_mt_reg __read_mostly = { + .name = "stp", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_stp_mt, + .checkentry = ebt_stp_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_stp_info)), .me = THIS_MODULE, }; static int __init ebt_stp_init(void) { - return ebt_register_match(&filter_stp); + return xt_register_match(&ebt_stp_mt_reg); } static void __exit ebt_stp_fini(void) { - ebt_unregister_match(&filter_stp); + xt_unregister_match(&ebt_stp_mt_reg); } module_init(ebt_stp_init); diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 2d4c9ef909fc..2c6d6823e703 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -36,6 +36,7 @@ #include <linux/timer.h> #include <linux/netlink.h> #include <linux/netdevice.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_ulog.h> #include <net/netfilter/nf_log.h> @@ -223,7 +224,7 @@ alloc_failure: } /* this function is registered with the netfilter core */ -static void ebt_log_packet(unsigned int pf, unsigned int hooknum, +static void ebt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *li, const char *prefix) @@ -245,24 +246,20 @@ static void ebt_log_packet(unsigned int pf, unsigned int hooknum, ebt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } -static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_ulog_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_ulog_info *uloginfo = data; - - ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL); + ebt_ulog_packet(par->hooknum, skb, par->in, par->out, + par->targinfo, NULL); + return EBT_CONTINUE; } - -static int ebt_ulog_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_ulog_tg_check(const struct xt_tgchk_param *par) { - struct ebt_ulog_info *uloginfo = data; + struct ebt_ulog_info *uloginfo = par->targinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_ulog_info)) || - uloginfo->nlgroup > 31) - return -EINVAL; + if (uloginfo->nlgroup > 31) + return false; uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0'; @@ -272,27 +269,31 @@ static int ebt_ulog_check(const char *tablename, unsigned int hookmask, return 0; } -static struct ebt_watcher ulog __read_mostly = { - .name = EBT_ULOG_WATCHER, - .watcher = ebt_ulog, - .check = ebt_ulog_check, +static struct xt_target ebt_ulog_tg_reg __read_mostly = { + .name = "ulog", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_ulog_tg, + .checkentry = ebt_ulog_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_ulog_info)), .me = THIS_MODULE, }; static const struct nf_logger ebt_ulog_logger = { - .name = EBT_ULOG_WATCHER, + .name = "ulog", .logfn = &ebt_log_packet, .me = THIS_MODULE, }; static int __init ebt_ulog_init(void) { - int i, ret = 0; + bool ret = true; + int i; if (nlbufsiz >= 128*1024) { printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB," " please try a smaller nlbufsiz parameter.\n"); - return -EINVAL; + return false; } /* initialize ulog_buffers */ @@ -304,13 +305,16 @@ static int __init ebt_ulog_init(void) ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, NULL, NULL, THIS_MODULE); - if (!ebtulognl) - ret = -ENOMEM; - else if ((ret = ebt_register_watcher(&ulog))) + if (!ebtulognl) { + printk(KERN_WARNING KBUILD_MODNAME ": out of memory trying to " + "call netlink_kernel_create\n"); + ret = false; + } else if (xt_register_target(&ebt_ulog_tg_reg) != 0) { netlink_kernel_release(ebtulognl); + } - if (ret == 0) - nf_log_register(PF_BRIDGE, &ebt_ulog_logger); + if (ret) + nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); return ret; } @@ -321,7 +325,7 @@ static void __exit ebt_ulog_fini(void) int i; nf_log_unregister(&ebt_ulog_logger); - ebt_unregister_watcher(&ulog); + xt_unregister_target(&ebt_ulog_tg_reg); for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { ub = &ulog_buffers[i]; if (timer_pending(&ub->timer)) diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index ab60b0dade80..3dddd489328e 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -22,6 +22,7 @@ #include <linux/if_vlan.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_vlan.h> @@ -37,15 +38,12 @@ MODULE_LICENSE("GPL"); #define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args) #define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_ -#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return EBT_NOMATCH;} +#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; } -static int -ebt_filter_vlan(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *data, unsigned int datalen) +static bool +ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_vlan_info *info = data; + const struct ebt_vlan_info *info = par->matchinfo; const struct vlan_hdr *fp; struct vlan_hdr _frame; @@ -57,7 +55,7 @@ ebt_filter_vlan(const struct sk_buff *skb, fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame); if (fp == NULL) - return EBT_NOMATCH; + return false; /* Tag Control Information (TCI) consists of the following elements: * - User_priority. The user_priority field is three bits in length, @@ -83,30 +81,20 @@ ebt_filter_vlan(const struct sk_buff *skb, if (GET_BITMASK(EBT_VLAN_ENCAP)) EXIT_ON_MISMATCH(encap, EBT_VLAN_ENCAP); - return EBT_MATCH; + return true; } -static int -ebt_check_vlan(const char *tablename, - unsigned int hooknr, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par) { - struct ebt_vlan_info *info = data; - - /* Parameters buffer overflow check */ - if (datalen != EBT_ALIGN(sizeof(struct ebt_vlan_info))) { - DEBUG_MSG - ("passed size %d is not eq to ebt_vlan_info (%Zd)\n", - datalen, sizeof(struct ebt_vlan_info)); - return -EINVAL; - } + struct ebt_vlan_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; /* Is it 802.1Q frame checked? */ if (e->ethproto != htons(ETH_P_8021Q)) { DEBUG_MSG ("passed entry proto %2.4X is not 802.1Q (8100)\n", (unsigned short) ntohs(e->ethproto)); - return -EINVAL; + return false; } /* Check for bitmask range @@ -114,14 +102,14 @@ ebt_check_vlan(const char *tablename, if (info->bitmask & ~EBT_VLAN_MASK) { DEBUG_MSG("bitmask %2X is out of mask (%2X)\n", info->bitmask, EBT_VLAN_MASK); - return -EINVAL; + return false; } /* Check for inversion flags range */ if (info->invflags & ~EBT_VLAN_MASK) { DEBUG_MSG("inversion flags %2X is out of mask (%2X)\n", info->invflags, EBT_VLAN_MASK); - return -EINVAL; + return false; } /* Reserved VLAN ID (VID) values @@ -136,7 +124,7 @@ ebt_check_vlan(const char *tablename, DEBUG_MSG ("id %d is out of range (1-4096)\n", info->id); - return -EINVAL; + return false; } /* Note: This is valid VLAN-tagged frame point. * Any value of user_priority are acceptable, @@ -151,7 +139,7 @@ ebt_check_vlan(const char *tablename, if ((unsigned char) info->prio > 7) { DEBUG_MSG("prio %d is out of range (0-7)\n", info->prio); - return -EINVAL; + return false; } } /* Check for encapsulated proto range - it is possible to be @@ -162,17 +150,20 @@ ebt_check_vlan(const char *tablename, DEBUG_MSG ("encap frame length %d is less than minimal\n", ntohs(info->encap)); - return -EINVAL; + return false; } } - return 0; + return true; } -static struct ebt_match filter_vlan __read_mostly = { - .name = EBT_VLAN_MATCH, - .match = ebt_filter_vlan, - .check = ebt_check_vlan, +static struct xt_match ebt_vlan_mt_reg __read_mostly = { + .name = "vlan", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_vlan_mt, + .checkentry = ebt_vlan_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_vlan_info)), .me = THIS_MODULE, }; @@ -181,12 +172,12 @@ static int __init ebt_vlan_init(void) DEBUG_MSG("ebtables 802.1Q extension module v" MODULE_VERS "\n"); DEBUG_MSG("module debug=%d\n", !!debug); - return ebt_register_match(&filter_vlan); + return xt_register_match(&ebt_vlan_mt_reg); } static void __exit ebt_vlan_fini(void) { - ebt_unregister_match(&filter_vlan); + xt_unregister_match(&ebt_vlan_mt_reg); } module_init(ebt_vlan_init); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 32afff859e4a..5bb88eb0aad4 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -19,6 +19,7 @@ #include <linux/kmod.h> #include <linux/module.h> #include <linux/vmalloc.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/spinlock.h> #include <linux/mutex.h> @@ -55,29 +56,31 @@ static DEFINE_MUTEX(ebt_mutex); static LIST_HEAD(ebt_tables); -static LIST_HEAD(ebt_targets); -static LIST_HEAD(ebt_matches); -static LIST_HEAD(ebt_watchers); -static struct ebt_target ebt_standard_target = -{ {NULL, NULL}, EBT_STANDARD_TARGET, NULL, NULL, NULL, NULL}; +static struct xt_target ebt_standard_target = { + .name = "standard", + .revision = 0, + .family = NFPROTO_BRIDGE, + .targetsize = sizeof(int), +}; -static inline int ebt_do_watcher (struct ebt_entry_watcher *w, - const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, - const struct net_device *out) +static inline int +ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb, + struct xt_target_param *par) { - w->u.watcher->watcher(skb, hooknr, in, out, w->data, - w->watcher_size); + par->target = w->u.watcher; + par->targinfo = w->data; + w->u.watcher->target(skb, par); /* watchers don't give a verdict */ return 0; } static inline int ebt_do_match (struct ebt_entry_match *m, - const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out) + const struct sk_buff *skb, struct xt_match_param *par) { - return m->u.match->match(skb, in, out, m->data, - m->match_size); + par->match = m->u.match; + par->matchinfo = m->data; + return m->u.match->match(skb, par); } static inline int ebt_dev_check(char *entry, const struct net_device *device) @@ -153,6 +156,15 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, struct ebt_entries *chaininfo; char *base; struct ebt_table_info *private; + bool hotdrop = false; + struct xt_match_param mtpar; + struct xt_target_param tgpar; + + mtpar.family = tgpar.family = NFPROTO_BRIDGE; + mtpar.in = tgpar.in = in; + mtpar.out = tgpar.out = out; + mtpar.hotdrop = &hotdrop; + tgpar.hooknum = hook; read_lock_bh(&table->lock); private = table->private; @@ -173,8 +185,12 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, if (ebt_basic_match(point, eth_hdr(skb), in, out)) goto letscontinue; - if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, in, out) != 0) + if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &mtpar) != 0) goto letscontinue; + if (hotdrop) { + read_unlock_bh(&table->lock); + return NF_DROP; + } /* increase counter */ (*(counter_base + i)).pcnt++; @@ -182,17 +198,18 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, /* these should only watch: not modify, nor tell us what to do with the packet */ - EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, hook, in, - out); + EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &tgpar); t = (struct ebt_entry_target *) (((char *)point) + point->target_offset); /* standard target */ if (!t->u.target->target) verdict = ((struct ebt_standard_target *)t)->verdict; - else - verdict = t->u.target->target(skb, hook, - in, out, t->data, t->target_size); + else { + tgpar.target = t->u.target; + tgpar.targinfo = t->data; + verdict = t->u.target->target(skb, &tgpar); + } if (verdict == EBT_ACCEPT) { read_unlock_bh(&table->lock); return NF_ACCEPT; @@ -312,80 +329,71 @@ find_table_lock(const char *name, int *error, struct mutex *mutex) return find_inlist_lock(&ebt_tables, name, "ebtable_", error, mutex); } -static inline struct ebt_match * -find_match_lock(const char *name, int *error, struct mutex *mutex) -{ - return find_inlist_lock(&ebt_matches, name, "ebt_", error, mutex); -} - -static inline struct ebt_watcher * -find_watcher_lock(const char *name, int *error, struct mutex *mutex) -{ - return find_inlist_lock(&ebt_watchers, name, "ebt_", error, mutex); -} - -static inline struct ebt_target * -find_target_lock(const char *name, int *error, struct mutex *mutex) -{ - return find_inlist_lock(&ebt_targets, name, "ebt_", error, mutex); -} - static inline int -ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e, - const char *name, unsigned int hookmask, unsigned int *cnt) +ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par, + unsigned int *cnt) { - struct ebt_match *match; + const struct ebt_entry *e = par->entryinfo; + struct xt_match *match; size_t left = ((char *)e + e->watchers_offset) - (char *)m; int ret; if (left < sizeof(struct ebt_entry_match) || left - sizeof(struct ebt_entry_match) < m->match_size) return -EINVAL; - match = find_match_lock(m->u.name, &ret, &ebt_mutex); - if (!match) - return ret; - m->u.match = match; - if (!try_module_get(match->me)) { - mutex_unlock(&ebt_mutex); + + match = try_then_request_module(xt_find_match(NFPROTO_BRIDGE, + m->u.name, 0), "ebt_%s", m->u.name); + if (IS_ERR(match)) + return PTR_ERR(match); + if (match == NULL) return -ENOENT; - } - mutex_unlock(&ebt_mutex); - if (match->check && - match->check(name, hookmask, e, m->data, m->match_size) != 0) { - BUGPRINT("match->check failed\n"); + m->u.match = match; + + par->match = match; + par->matchinfo = m->data; + ret = xt_check_match(par, m->match_size, + e->ethproto, e->invflags & EBT_IPROTO); + if (ret < 0) { module_put(match->me); - return -EINVAL; + return ret; } + (*cnt)++; return 0; } static inline int -ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e, - const char *name, unsigned int hookmask, unsigned int *cnt) +ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par, + unsigned int *cnt) { - struct ebt_watcher *watcher; + const struct ebt_entry *e = par->entryinfo; + struct xt_target *watcher; size_t left = ((char *)e + e->target_offset) - (char *)w; int ret; if (left < sizeof(struct ebt_entry_watcher) || left - sizeof(struct ebt_entry_watcher) < w->watcher_size) return -EINVAL; - watcher = find_watcher_lock(w->u.name, &ret, &ebt_mutex); - if (!watcher) - return ret; - w->u.watcher = watcher; - if (!try_module_get(watcher->me)) { - mutex_unlock(&ebt_mutex); + + watcher = try_then_request_module( + xt_find_target(NFPROTO_BRIDGE, w->u.name, 0), + "ebt_%s", w->u.name); + if (IS_ERR(watcher)) + return PTR_ERR(watcher); + if (watcher == NULL) return -ENOENT; - } - mutex_unlock(&ebt_mutex); - if (watcher->check && - watcher->check(name, hookmask, e, w->data, w->watcher_size) != 0) { - BUGPRINT("watcher->check failed\n"); + w->u.watcher = watcher; + + par->target = watcher; + par->targinfo = w->data; + ret = xt_check_target(par, w->watcher_size, + e->ethproto, e->invflags & EBT_IPROTO); + if (ret < 0) { module_put(watcher->me); - return -EINVAL; + return ret; } + (*cnt)++; return 0; } @@ -558,30 +566,41 @@ ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, static inline int ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i) { + struct xt_mtdtor_param par; + if (i && (*i)-- == 0) return 1; - if (m->u.match->destroy) - m->u.match->destroy(m->data, m->match_size); - module_put(m->u.match->me); + par.match = m->u.match; + par.matchinfo = m->data; + par.family = NFPROTO_BRIDGE; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); return 0; } static inline int ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i) { + struct xt_tgdtor_param par; + if (i && (*i)-- == 0) return 1; - if (w->u.watcher->destroy) - w->u.watcher->destroy(w->data, w->watcher_size); - module_put(w->u.watcher->me); + par.target = w->u.watcher; + par.targinfo = w->data; + par.family = NFPROTO_BRIDGE; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } static inline int ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) { + struct xt_tgdtor_param par; struct ebt_entry_target *t; if (e->bitmask == 0) @@ -592,10 +611,13 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL); EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); - if (t->u.target->destroy) - t->u.target->destroy(t->data, t->target_size); - module_put(t->u.target->me); + par.target = t->u.target; + par.targinfo = t->data; + par.family = NFPROTO_BRIDGE; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -605,10 +627,12 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, struct ebt_cl_stack *cl_s, unsigned int udc_cnt) { struct ebt_entry_target *t; - struct ebt_target *target; + struct xt_target *target; unsigned int i, j, hook = 0, hookmask = 0; size_t gap; int ret; + struct xt_mtchk_param mtpar; + struct xt_tgchk_param tgpar; /* don't mess with the struct ebt_entries */ if (e->bitmask == 0) @@ -649,24 +673,31 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, hookmask = cl_s[i - 1].hookmask; } i = 0; - ret = EBT_MATCH_ITERATE(e, ebt_check_match, e, name, hookmask, &i); + + mtpar.table = tgpar.table = name; + mtpar.entryinfo = tgpar.entryinfo = e; + mtpar.hook_mask = tgpar.hook_mask = hookmask; + mtpar.family = tgpar.family = NFPROTO_BRIDGE; + ret = EBT_MATCH_ITERATE(e, ebt_check_match, &mtpar, &i); if (ret != 0) goto cleanup_matches; j = 0; - ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, e, name, hookmask, &j); + ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, &tgpar, &j); if (ret != 0) goto cleanup_watchers; t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); gap = e->next_offset - e->target_offset; - target = find_target_lock(t->u.name, &ret, &ebt_mutex); - if (!target) + + target = try_then_request_module( + xt_find_target(NFPROTO_BRIDGE, t->u.name, 0), + "ebt_%s", t->u.name); + if (IS_ERR(target)) { + ret = PTR_ERR(target); goto cleanup_watchers; - if (!try_module_get(target->me)) { - mutex_unlock(&ebt_mutex); + } else if (target == NULL) { ret = -ENOENT; goto cleanup_watchers; } - mutex_unlock(&ebt_mutex); t->u.target = target; if (t->u.target == &ebt_standard_target) { @@ -681,13 +712,20 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, ret = -EFAULT; goto cleanup_watchers; } - } else if (t->target_size > gap - sizeof(struct ebt_entry_target) || - (t->u.target->check && - t->u.target->check(name, hookmask, e, t->data, t->target_size) != 0)){ + } else if (t->target_size > gap - sizeof(struct ebt_entry_target)) { module_put(t->u.target->me); ret = -EFAULT; goto cleanup_watchers; } + + tgpar.target = target; + tgpar.targinfo = t->data; + ret = xt_check_target(&tgpar, t->target_size, + e->ethproto, e->invflags & EBT_IPROTO); + if (ret < 0) { + module_put(target->me); + goto cleanup_watchers; + } (*cnt)++; return 0; cleanup_watchers: @@ -1068,87 +1106,6 @@ free_newinfo: return ret; } -int ebt_register_target(struct ebt_target *target) -{ - struct ebt_target *t; - int ret; - - ret = mutex_lock_interruptible(&ebt_mutex); - if (ret != 0) - return ret; - list_for_each_entry(t, &ebt_targets, list) { - if (strcmp(t->name, target->name) == 0) { - mutex_unlock(&ebt_mutex); - return -EEXIST; - } - } - list_add(&target->list, &ebt_targets); - mutex_unlock(&ebt_mutex); - - return 0; -} - -void ebt_unregister_target(struct ebt_target *target) -{ - mutex_lock(&ebt_mutex); - list_del(&target->list); - mutex_unlock(&ebt_mutex); -} - -int ebt_register_match(struct ebt_match *match) -{ - struct ebt_match *m; - int ret; - - ret = mutex_lock_interruptible(&ebt_mutex); - if (ret != 0) - return ret; - list_for_each_entry(m, &ebt_matches, list) { - if (strcmp(m->name, match->name) == 0) { - mutex_unlock(&ebt_mutex); - return -EEXIST; - } - } - list_add(&match->list, &ebt_matches); - mutex_unlock(&ebt_mutex); - - return 0; -} - -void ebt_unregister_match(struct ebt_match *match) -{ - mutex_lock(&ebt_mutex); - list_del(&match->list); - mutex_unlock(&ebt_mutex); -} - -int ebt_register_watcher(struct ebt_watcher *watcher) -{ - struct ebt_watcher *w; - int ret; - - ret = mutex_lock_interruptible(&ebt_mutex); - if (ret != 0) - return ret; - list_for_each_entry(w, &ebt_watchers, list) { - if (strcmp(w->name, watcher->name) == 0) { - mutex_unlock(&ebt_mutex); - return -EEXIST; - } - } - list_add(&watcher->list, &ebt_watchers); - mutex_unlock(&ebt_mutex); - - return 0; -} - -void ebt_unregister_watcher(struct ebt_watcher *watcher) -{ - mutex_lock(&ebt_mutex); - list_del(&watcher->list); - mutex_unlock(&ebt_mutex); -} - int ebt_register_table(struct ebt_table *table) { struct ebt_table_info *newinfo; @@ -1518,11 +1475,14 @@ static int __init ebtables_init(void) { int ret; - mutex_lock(&ebt_mutex); - list_add(&ebt_standard_target.list, &ebt_targets); - mutex_unlock(&ebt_mutex); - if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) + ret = xt_register_target(&ebt_standard_target); + if (ret < 0) + return ret; + ret = nf_register_sockopt(&ebt_sockopts); + if (ret < 0) { + xt_unregister_target(&ebt_standard_target); return ret; + } printk(KERN_INFO "Ebtables v2.0 registered\n"); return 0; @@ -1531,17 +1491,12 @@ static int __init ebtables_init(void) static void __exit ebtables_fini(void) { nf_unregister_sockopt(&ebt_sockopts); + xt_unregister_target(&ebt_standard_target); printk(KERN_INFO "Ebtables v2.0 unregistered\n"); } EXPORT_SYMBOL(ebt_register_table); EXPORT_SYMBOL(ebt_unregister_table); -EXPORT_SYMBOL(ebt_register_match); -EXPORT_SYMBOL(ebt_unregister_match); -EXPORT_SYMBOL(ebt_register_watcher); -EXPORT_SYMBOL(ebt_unregister_watcher); -EXPORT_SYMBOL(ebt_register_target); -EXPORT_SYMBOL(ebt_unregister_target); EXPORT_SYMBOL(ebt_do_table); module_init(ebtables_init); module_exit(ebtables_fini); diff --git a/net/core/Makefile b/net/core/Makefile index b1332f6d0042..26a37cb31923 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -6,6 +6,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ gen_stats.o gen_estimator.o net_namespace.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o +obj-$(CONFIG_HAS_DMA) += skb_dma_map.o obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o diff --git a/net/core/datagram.c b/net/core/datagram.c index 52f577a0f544..ee631843c2f5 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -9,7 +9,7 @@ * identical recvmsg() code. So we share it here. The poll was * shared before but buried in udp.c so I moved it. * - * Authors: Alan Cox <alan@redhat.com>. (datagram_poll() from old + * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old * udp.c code) * * Fixes: diff --git a/net/core/dev.c b/net/core/dev.c index 60c51f765887..1408a083fe4e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -122,6 +122,7 @@ #include <linux/if_arp.h> #include <linux/if_vlan.h> #include <linux/ip.h> +#include <net/ip.h> #include <linux/ipv6.h> #include <linux/in.h> #include <linux/jhash.h> @@ -890,7 +891,7 @@ int dev_alloc_name(struct net_device *dev, const char *name) * Change name of a device, can pass format strings "eth%d". * for wildcarding. */ -int dev_change_name(struct net_device *dev, char *newname) +int dev_change_name(struct net_device *dev, const char *newname) { char oldname[IFNAMSIZ]; int err = 0; @@ -916,7 +917,6 @@ int dev_change_name(struct net_device *dev, char *newname) err = dev_alloc_name(dev, newname); if (err < 0) return err; - strcpy(newname, dev->name); } else if (__dev_get_by_name(net, newname)) return -EEXIST; @@ -954,6 +954,38 @@ rollback: } /** + * dev_set_alias - change ifalias of a device + * @dev: device + * @alias: name up to IFALIASZ + * @len: limit of bytes to copy from info + * + * Set ifalias for a device, + */ +int dev_set_alias(struct net_device *dev, const char *alias, size_t len) +{ + ASSERT_RTNL(); + + if (len >= IFALIASZ) + return -EINVAL; + + if (!len) { + if (dev->ifalias) { + kfree(dev->ifalias); + dev->ifalias = NULL; + } + return 0; + } + + dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL); + if (!dev->ifalias) + return -ENOMEM; + + strlcpy(dev->ifalias, alias, len+1); + return len; +} + + +/** * netdev_features_change - device changes features * @dev: device to cause notification * @@ -1667,7 +1699,7 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) { u32 addr1, addr2, ports; u32 hash, ihl; - u8 ip_proto; + u8 ip_proto = 0; if (unlikely(!simple_tx_hashrnd_initialized)) { get_random_bytes(&simple_tx_hashrnd, 4); @@ -1675,13 +1707,14 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) } switch (skb->protocol) { - case __constant_htons(ETH_P_IP): - ip_proto = ip_hdr(skb)->protocol; + case htons(ETH_P_IP): + if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) + ip_proto = ip_hdr(skb)->protocol; addr1 = ip_hdr(skb)->saddr; addr2 = ip_hdr(skb)->daddr; ihl = ip_hdr(skb)->ihl; break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): ip_proto = ipv6_hdr(skb)->nexthdr; addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; @@ -1991,8 +2024,13 @@ static void net_tx_action(struct softirq_action *h) spin_unlock(root_lock); } else { if (!test_bit(__QDISC_STATE_DEACTIVATED, - &q->state)) + &q->state)) { __netif_reschedule(q); + } else { + smp_mb__before_clear_bit(); + clear_bit(__QDISC_STATE_SCHED, + &q->state); + } } } } @@ -2911,6 +2949,12 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } +static void dev_change_rx_flags(struct net_device *dev, int flags) +{ + if (dev->flags & IFF_UP && dev->change_rx_flags) + dev->change_rx_flags(dev, flags); +} + static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; @@ -2948,8 +2992,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) current->uid, current->gid, audit_get_sessionid(current)); - if (dev->change_rx_flags) - dev->change_rx_flags(dev, IFF_PROMISC); + dev_change_rx_flags(dev, IFF_PROMISC); } return 0; } @@ -3015,8 +3058,7 @@ int dev_set_allmulti(struct net_device *dev, int inc) } } if (dev->flags ^ old_flags) { - if (dev->change_rx_flags) - dev->change_rx_flags(dev, IFF_ALLMULTI); + dev_change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); } return 0; @@ -3295,6 +3337,12 @@ static void dev_addr_discard(struct net_device *dev) netif_addr_unlock_bh(dev); } +/** + * dev_get_flags - get flags reported to userspace + * @dev: device + * + * Get the combination of flag bits exported through APIs to userspace. + */ unsigned dev_get_flags(const struct net_device *dev) { unsigned flags; @@ -3319,6 +3367,14 @@ unsigned dev_get_flags(const struct net_device *dev) return flags; } +/** + * dev_change_flags - change device settings + * @dev: device + * @flags: device state flags + * + * Change settings on device based state flags. The flags are + * in the userspace exported format. + */ int dev_change_flags(struct net_device *dev, unsigned flags) { int ret, changes; @@ -3340,8 +3396,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST) - dev->change_rx_flags(dev, IFF_MULTICAST); + if ((old_flags ^ flags) & IFF_MULTICAST) + dev_change_rx_flags(dev, IFF_MULTICAST); dev_set_rx_mode(dev); @@ -3388,6 +3444,13 @@ int dev_change_flags(struct net_device *dev, unsigned flags) return ret; } +/** + * dev_set_mtu - Change maximum transfer unit + * @dev: device + * @new_mtu: new transfer unit + * + * Change the maximum transfer size of the network device. + */ int dev_set_mtu(struct net_device *dev, int new_mtu) { int err; @@ -3412,6 +3475,13 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) return err; } +/** + * dev_set_mac_address - Change Media Access Control Address + * @dev: device + * @sa: new address + * + * Change the hardware (MAC) address of the device + */ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) { int err; @@ -3801,14 +3871,11 @@ static int dev_new_index(struct net *net) } /* Delayed registration/unregisteration */ -static DEFINE_SPINLOCK(net_todo_list_lock); static LIST_HEAD(net_todo_list); static void net_set_todo(struct net_device *dev) { - spin_lock(&net_todo_list_lock); list_add_tail(&dev->todo_list, &net_todo_list); - spin_unlock(&net_todo_list_lock); } static void rollback_registered(struct net_device *dev) @@ -4135,33 +4202,24 @@ static void netdev_wait_allrefs(struct net_device *dev) * free_netdev(y1); * free_netdev(y2); * - * We are invoked by rtnl_unlock() after it drops the semaphore. + * We are invoked by rtnl_unlock(). * This allows us to deal with problems: * 1) We can delete sysfs objects which invoke hotplug * without deadlocking with linkwatch via keventd. * 2) Since we run with the RTNL semaphore not held, we can sleep * safely in order to wait for the netdev refcnt to drop to zero. + * + * We must not return until all unregister events added during + * the interval the lock was held have been completed. */ -static DEFINE_MUTEX(net_todo_run_mutex); void netdev_run_todo(void) { struct list_head list; - /* Need to guard against multiple cpu's getting out of order. */ - mutex_lock(&net_todo_run_mutex); - - /* Not safe to do outside the semaphore. We must not return - * until all unregister events invoked by the local processor - * have been completed (either by this todo run, or one on - * another cpu). - */ - if (list_empty(&net_todo_list)) - goto out; - /* Snapshot list, allow later requests */ - spin_lock(&net_todo_list_lock); list_replace_init(&net_todo_list, &list); - spin_unlock(&net_todo_list_lock); + + __rtnl_unlock(); while (!list_empty(&list)) { struct net_device *dev @@ -4193,9 +4251,6 @@ void netdev_run_todo(void) /* Free network device */ kobject_put(&dev->dev.kobj); } - -out: - mutex_unlock(&net_todo_run_mutex); } static struct net_device_stats *internal_stats(struct net_device *dev) @@ -4315,7 +4370,12 @@ void free_netdev(struct net_device *dev) put_device(&dev->dev); } -/* Synchronize with packet receive processing. */ +/** + * synchronize_net - Synchronize with packet receive processing + * + * Wait for packets currently being received to be done. + * Does not block later packets from starting. + */ void synchronize_net(void) { might_sleep(); @@ -4617,7 +4677,7 @@ netdev_dma_event(struct dma_client *client, struct dma_chan *chan, } /** - * netdev_dma_regiser - register the networking subsystem as a DMA client + * netdev_dma_register - register the networking subsystem as a DMA client */ static int __init netdev_dma_register(void) { @@ -4663,6 +4723,12 @@ int netdev_compute_features(unsigned long all, unsigned long one) one |= NETIF_F_GSO_SOFTWARE; one |= NETIF_F_GSO; + /* + * If even one device supports a GSO protocol with software fallback, + * enable it for all. + */ + all |= one & NETIF_F_GSO_SOFTWARE; + /* If even one device supports robust GSO, enable it for all. */ if (one & NETIF_F_GSO_ROBUST) all |= NETIF_F_GSO_ROBUST; @@ -4712,10 +4778,18 @@ err_name: return -ENOMEM; } -char *netdev_drivername(struct net_device *dev, char *buffer, int len) +/** + * netdev_drivername - network driver for the device + * @dev: network device + * @buffer: buffer for resulting name + * @len: size of buffer + * + * Determine network driver for device. + */ +char *netdev_drivername(const struct net_device *dev, char *buffer, int len) { - struct device_driver *driver; - struct device *parent; + const struct device_driver *driver; + const struct device *parent; if (len <= 0 || !buffer) return buffer; diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 5402b3b38e0d..9e2fa39f22a3 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -6,7 +6,7 @@ * Richard Underwood <richard@wuzz.demon.co.uk> * * Stir fried together from the IP multicast and CAP patches above - * Alan Cox <Alan.Cox@linux.org> + * Alan Cox <alan@lxorguk.ukuu.org.uk> * * Fixes: * Alan Cox : Update the device on a real delete diff --git a/net/core/dst.c b/net/core/dst.c index fe03266130b6..09c1530f4681 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -203,6 +203,7 @@ void __dst_free(struct dst_entry * dst) if (dst_garbage.timer_inc > DST_GC_INC) { dst_garbage.timer_inc = DST_GC_INC; dst_garbage.timer_expires = DST_GC_MIN; + cancel_delayed_work(&dst_gc_work); schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires); } spin_unlock_bh(&dst_garbage.lock); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9d92e41826e7..1dc728b38589 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -927,8 +927,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) { struct sk_buff *buff; - buff = neigh->arp_queue.next; - __skb_unlink(buff, &neigh->arp_queue); + buff = __skb_dequeue(&neigh->arp_queue); kfree_skb(buff); NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } @@ -1259,24 +1258,20 @@ static void neigh_proxy_process(unsigned long arg) struct neigh_table *tbl = (struct neigh_table *)arg; long sched_next = 0; unsigned long now = jiffies; - struct sk_buff *skb; + struct sk_buff *skb, *n; spin_lock(&tbl->proxy_queue.lock); - skb = tbl->proxy_queue.next; - - while (skb != (struct sk_buff *)&tbl->proxy_queue) { - struct sk_buff *back = skb; - long tdif = NEIGH_CB(back)->sched_next - now; + skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { + long tdif = NEIGH_CB(skb)->sched_next - now; - skb = skb->next; if (tdif <= 0) { - struct net_device *dev = back->dev; - __skb_unlink(back, &tbl->proxy_queue); + struct net_device *dev = skb->dev; + __skb_unlink(skb, &tbl->proxy_queue); if (tbl->proxy_redo && netif_running(dev)) - tbl->proxy_redo(back); + tbl->proxy_redo(skb); else - kfree_skb(back); + kfree_skb(skb); dev_put(dev); } else if (!sched_next || tdif < sched_next) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c1f4e0d428c0..92d6b9467314 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -209,9 +209,44 @@ static ssize_t store_tx_queue_len(struct device *dev, return netdev_store(dev, attr, buf, len, change_tx_queue_len); } +static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct net_device *netdev = to_net_dev(dev); + size_t count = len; + ssize_t ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* ignore trailing newline */ + if (len > 0 && buf[len - 1] == '\n') + --count; + + rtnl_lock(); + ret = dev_set_alias(netdev, buf, count); + rtnl_unlock(); + + return ret < 0 ? ret : len; +} + +static ssize_t show_ifalias(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct net_device *netdev = to_net_dev(dev); + ssize_t ret = 0; + + rtnl_lock(); + if (netdev->ifalias) + ret = sprintf(buf, "%s\n", netdev->ifalias); + rtnl_unlock(); + return ret; +} + static struct device_attribute net_class_attributes[] = { __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(dev_id, S_IRUGO, show_dev_id, NULL), + __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), __ATTR(iflink, S_IRUGO, show_iflink, NULL), __ATTR(ifindex, S_IRUGO, show_ifindex, NULL), __ATTR(features, S_IRUGO, show_features, NULL), @@ -418,6 +453,7 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); + kfree(dev->ifalias); kfree((char *)dev - dev->padded); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 7c52fe277b62..f1d07b5c1e17 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -18,6 +18,7 @@ static struct list_head *first_device = &pernet_list; static DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); +EXPORT_SYMBOL_GPL(net_namespace_list); struct net init_net; EXPORT_SYMBOL(init_net); @@ -95,7 +96,7 @@ static void net_free(struct net *net) return; } #endif - + kfree(net->gen); kmem_cache_free(net_cachep, net); } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a756847e3814..99f656d35b4f 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2474,7 +2474,7 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, if (ret < 0) { printk(KERN_ERR "Error expanding " "ipsec packet %d\n",ret); - return 0; + goto err; } } @@ -2484,8 +2484,7 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, if (ret) { printk(KERN_ERR "Error creating ipsec " "packet %d\n",ret); - kfree_skb(skb); - return 0; + goto err; } /* restore ll */ eth = (__u8 *) skb_push(skb, ETH_HLEN); @@ -2494,6 +2493,9 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, } } return 1; +err: + kfree_skb(skb); + return 0; } #endif diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 71edb8b36341..3630131fa1fa 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -73,7 +73,7 @@ void __rtnl_unlock(void) void rtnl_unlock(void) { - mutex_unlock(&rtnl_mutex); + /* This fellow will unlock it for us. */ netdev_run_todo(); } @@ -586,6 +586,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */ + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ + nla_total_size(sizeof(struct rtnl_link_ifmap)) + nla_total_size(sizeof(struct rtnl_link_stats)) @@ -640,6 +641,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (txq->qdisc_sleeping) NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id); + if (dev->ifalias) + NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias); + if (1) { struct rtnl_link_ifmap map = { .mem_start = dev->mem_start, @@ -713,6 +717,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKMODE] = { .type = NLA_U8 }, [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, + [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -853,6 +858,14 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, modified = 1; } + if (tb[IFLA_IFALIAS]) { + err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), + nla_len(tb[IFLA_IFALIAS])); + if (err < 0) + goto errout; + modified = 1; + } + if (tb[IFLA_BROADCAST]) { nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); send_addr_notify = 1; diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c new file mode 100644 index 000000000000..86234923a3b7 --- /dev/null +++ b/net/core/skb_dma_map.c @@ -0,0 +1,66 @@ +/* skb_dma_map.c: DMA mapping helpers for socket buffers. + * + * Copyright (C) David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/dma-mapping.h> +#include <linux/skbuff.h> + +int skb_dma_map(struct device *dev, struct sk_buff *skb, + enum dma_data_direction dir) +{ + struct skb_shared_info *sp = skb_shinfo(skb); + dma_addr_t map; + int i; + + map = dma_map_single(dev, skb->data, + skb_headlen(skb), dir); + if (dma_mapping_error(dev, map)) + goto out_err; + + sp->dma_maps[0] = map; + for (i = 0; i < sp->nr_frags; i++) { + skb_frag_t *fp = &sp->frags[i]; + + map = dma_map_page(dev, fp->page, fp->page_offset, + fp->size, dir); + if (dma_mapping_error(dev, map)) + goto unwind; + sp->dma_maps[i + 1] = map; + } + sp->num_dma_maps = i + 1; + + return 0; + +unwind: + while (--i >= 0) { + skb_frag_t *fp = &sp->frags[i]; + + dma_unmap_page(dev, sp->dma_maps[i + 1], + fp->size, dir); + } + dma_unmap_single(dev, sp->dma_maps[0], + skb_headlen(skb), dir); +out_err: + return -ENOMEM; +} +EXPORT_SYMBOL(skb_dma_map); + +void skb_dma_unmap(struct device *dev, struct sk_buff *skb, + enum dma_data_direction dir) +{ + struct skb_shared_info *sp = skb_shinfo(skb); + int i; + + dma_unmap_single(dev, sp->dma_maps[0], + skb_headlen(skb), dir); + for (i = 0; i < sp->nr_frags; i++) { + skb_frag_t *fp = &sp->frags[i]; + + dma_unmap_page(dev, sp->dma_maps[i + 1], + fp->size, dir); + } +} +EXPORT_SYMBOL(skb_dma_unmap); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ca1ccdf1ef76..4e22e3a35359 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1,7 +1,7 @@ /* * Routines having to do with the 'struct sk_buff' memory handlers. * - * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> + * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * * Fixes: @@ -263,6 +263,26 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, return skb; } +struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask) +{ + int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; + struct page *page; + + page = alloc_pages_node(node, gfp_mask, 0); + return page; +} +EXPORT_SYMBOL(__netdev_alloc_page); + +void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, + int size) +{ + skb_fill_page_desc(skb, i, page, off, size); + skb->len += size; + skb->data_len += size; + skb->truesize += size; +} +EXPORT_SYMBOL(skb_add_rx_frag); + /** * dev_alloc_skb - allocate an skbuff for receiving * @length: length to allocate @@ -363,8 +383,7 @@ static void kfree_skbmem(struct sk_buff *skb) } } -/* Free everything but the sk_buff shell. */ -static void skb_release_all(struct sk_buff *skb) +static void skb_release_head_state(struct sk_buff *skb) { dst_release(skb->dst); #ifdef CONFIG_XFRM @@ -388,6 +407,12 @@ static void skb_release_all(struct sk_buff *skb) skb->tc_verd = 0; #endif #endif +} + +/* Free everything but the sk_buff shell. */ +static void skb_release_all(struct sk_buff *skb) +{ + skb_release_head_state(skb); skb_release_data(skb); } @@ -424,6 +449,38 @@ void kfree_skb(struct sk_buff *skb) __kfree_skb(skb); } +int skb_recycle_check(struct sk_buff *skb, int skb_size) +{ + struct skb_shared_info *shinfo; + + if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) + return 0; + + skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); + if (skb_end_pointer(skb) - skb->head < skb_size) + return 0; + + if (skb_shared(skb) || skb_cloned(skb)) + return 0; + + skb_release_head_state(skb); + shinfo = skb_shinfo(skb); + atomic_set(&shinfo->dataref, 1); + shinfo->nr_frags = 0; + shinfo->gso_size = 0; + shinfo->gso_segs = 0; + shinfo->gso_type = 0; + shinfo->ip6_frag_id = 0; + shinfo->frag_list = NULL; + + memset(skb, 0, offsetof(struct sk_buff, tail)); + skb_reset_tail_pointer(skb); + skb->data = skb->head + NET_SKB_PAD; + + return 1; +} +EXPORT_SYMBOL(skb_recycle_check); + static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { new->tstamp = old->tstamp; @@ -701,6 +758,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, #endif long off; + BUG_ON(nhead < 0); + if (skb_shared(skb)) BUG(); diff --git a/net/core/sock.c b/net/core/sock.c index 91f8bbc93526..5e2a3132a8c9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -154,7 +154,8 @@ static const char *af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , - "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX" + "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , + "sk_lock-AF_MAX" }; static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , @@ -168,7 +169,8 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , - "slock-AF_RXRPC" , "slock-AF_MAX" + "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , + "slock-AF_MAX" }; static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , @@ -182,7 +184,8 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , - "clock-AF_RXRPC" , "clock-AF_MAX" + "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , + "clock-AF_MAX" }; #endif @@ -324,7 +327,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) */ mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); - rc = sk->sk_backlog_rcv(sk, skb); + rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); } else @@ -1371,7 +1374,7 @@ static void __release_sock(struct sock *sk) struct sk_buff *next = skb->next; skb->next = NULL; - sk->sk_backlog_rcv(sk, skb); + sk_backlog_rcv(sk, skb); /* * We are in process context here with softirqs diff --git a/net/core/stream.c b/net/core/stream.c index a6b3437ff082..8727cead64ad 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -9,7 +9,7 @@ * * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> * (from old tcp.c code) - * Alan Cox <alan@redhat.com> (Borrowed comments 8-)) + * Alan Cox <alan@lxorguk.ukuu.org.uk> (Borrowed comments 8-)) */ #include <linux/module.h> diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 8e9580874216..9a430734530c 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -783,7 +783,7 @@ static struct ccid_operations ccid2 = { }; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG -module_param(ccid2_debug, bool, 0444); +module_param(ccid2_debug, bool, 0644); MODULE_PARM_DESC(ccid2_debug, "Enable debug messages"); #endif diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index f6756e0c9e69..3b8bd7ca6761 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -963,7 +963,7 @@ static struct ccid_operations ccid3 = { }; #ifdef CONFIG_IP_DCCP_CCID3_DEBUG -module_param(ccid3_debug, bool, 0444); +module_param(ccid3_debug, bool, 0644); MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); #endif diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index bcd6ac415bb9..5b3ce0688c5c 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -67,7 +67,10 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0; int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */ - for (i=0; i <= k; i++) { + if (k <= 0) + return; + + for (i = 0; i <= k; i++) { i_i = tfrc_lh_get_interval(lh, i); if (i < k) { @@ -78,7 +81,6 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) i_tot1 += i_i * tfrc_lh_weights[i-1]; } - BUG_ON(w_tot == 0); lh->i_mean = max(i_tot0, i_tot1) / w_tot; } diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 97ecec0a8e76..185916218e07 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -10,7 +10,7 @@ #ifdef CONFIG_IP_DCCP_TFRC_DEBUG int tfrc_debug; -module_param(tfrc_debug, bool, 0444); +module_param(tfrc_debug, bool, 0644); MODULE_PARM_DESC(tfrc_debug, "Enable debug messages"); #endif diff --git a/net/dccp/input.c b/net/dccp/input.c index 803933ab396d..779d0ed9ae94 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -370,7 +370,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, goto discard; if (dccp_parse_options(sk, NULL, skb)) - goto discard; + return 1; if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); @@ -610,7 +610,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * Step 8: Process options and mark acknowledgeable */ if (dccp_parse_options(sk, NULL, skb)) - goto discard; + return 1; if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 882c5c4de69e..e3dfddab21cc 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -811,9 +811,8 @@ static int dccp_v4_rcv(struct sk_buff *skb) /* Step 2: * Look up flow ID in table and get corresponding socket */ - sk = __inet_lookup(dev_net(skb->dst->dev), &dccp_hashinfo, - iph->saddr, dh->dccph_sport, - iph->daddr, dh->dccph_dport, inet_iif(skb)); + sk = __inet_lookup_skb(&dccp_hashinfo, skb, + dh->dccph_sport, dh->dccph_dport); /* * Step 2: * If no socket ... diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5e1ee0da2c40..11062780bb02 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -98,7 +98,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { - ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -107,7 +108,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); if (sk == NULL) { - ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -805,10 +807,8 @@ static int dccp_v6_rcv(struct sk_buff *skb) /* Step 2: * Look up flow ID in table and get corresponding socket */ - sk = __inet6_lookup(dev_net(skb->dst->dev), &dccp_hashinfo, - &ipv6_hdr(skb)->saddr, dh->dccph_sport, - &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport), - inet6_iif(skb)); + sk = __inet6_lookup_skb(&dccp_hashinfo, skb, + dh->dccph_sport, dh->dccph_dport); /* * Step 2: * If no socket ... diff --git a/net/dccp/options.c b/net/dccp/options.c index dc7c158a2f4b..0809b63cb055 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -81,11 +81,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, /* Check if this isn't a single byte option */ if (opt > DCCPO_MAX_RESERVED) { if (opt_ptr == opt_end) - goto out_invalid_option; + goto out_nonsensical_length; len = *opt_ptr++; - if (len < 3) - goto out_invalid_option; + if (len < 2) + goto out_nonsensical_length; /* * Remove the type and len fields, leaving * just the value size @@ -95,7 +95,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, opt_ptr += len; if (opt_ptr > opt_end) - goto out_invalid_option; + goto out_nonsensical_length; } /* @@ -283,12 +283,17 @@ ignore_option: if (mandatory) goto out_invalid_option; +out_nonsensical_length: + /* RFC 4340, 5.8: ignore option and all remaining option space */ return 0; out_invalid_option: DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len); + DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt; + DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0; + DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0; return -1; } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 1ca3b26eed0f..d0bd34819761 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -309,7 +309,9 @@ int dccp_disconnect(struct sock *sk, int flags) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); + __skb_queue_purge(&sk->sk_receive_queue); + __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; @@ -1028,7 +1030,7 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); #ifdef CONFIG_IP_DCCP_DEBUG int dccp_debug; -module_param(dccp_debug, bool, 0444); +module_param(dccp_debug, bool, 0644); MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); EXPORT_SYMBOL_GPL(dccp_debug); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 2f0ac3c3eb71..8008c8613027 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -152,7 +152,7 @@ static struct dn_dev_parms dn_dev_list[] = { #define DN_DEV_LIST_SIZE ARRAY_SIZE(dn_dev_list) -#define DN_DEV_PARMS_OFFSET(x) ((int) ((char *) &((struct dn_dev_parms *)0)->x)) +#define DN_DEV_PARMS_OFFSET(x) offsetof(struct dn_dev_parms, x) #ifdef CONFIG_SYSCTL @@ -166,7 +166,7 @@ static int max_priority[] = { 127 }; /* From DECnet spec */ static int dn_forwarding_proc(ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, +static int dn_forwarding_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); @@ -318,7 +318,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, #endif } -static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, +static int dn_forwarding_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 228067c571ba..36400b266896 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str) } -static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen, +static int dn_node_address_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -217,7 +217,7 @@ static int dn_node_address_handler(ctl_table *table, int write, } -static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen, +static int dn_def_dev_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig new file mode 100644 index 000000000000..49211b35725b --- /dev/null +++ b/net/dsa/Kconfig @@ -0,0 +1,60 @@ +menuconfig NET_DSA + bool "Distributed Switch Architecture support" + default n + depends on EXPERIMENTAL && !S390 + select PHYLIB + ---help--- + This allows you to use hardware switch chips that use + the Distributed Switch Architecture. + + +if NET_DSA + +# tagging formats +config NET_DSA_TAG_DSA + bool + default n + +config NET_DSA_TAG_EDSA + bool + default n + +config NET_DSA_TAG_TRAILER + bool + default n + + +# switch drivers +config NET_DSA_MV88E6XXX + bool + default n + +config NET_DSA_MV88E6060 + bool "Marvell 88E6060 ethernet switch chip support" + select NET_DSA_TAG_TRAILER + ---help--- + This enables support for the Marvell 88E6060 ethernet switch + chip. + +config NET_DSA_MV88E6XXX_NEED_PPU + bool + default n + +config NET_DSA_MV88E6131 + bool "Marvell 88E6131 ethernet switch chip support" + select NET_DSA_MV88E6XXX + select NET_DSA_MV88E6XXX_NEED_PPU + select NET_DSA_TAG_DSA + ---help--- + This enables support for the Marvell 88E6131 ethernet switch + chip. + +config NET_DSA_MV88E6123_61_65 + bool "Marvell 88E6123/6161/6165 ethernet switch chip support" + select NET_DSA_MV88E6XXX + select NET_DSA_TAG_EDSA + ---help--- + This enables support for the Marvell 88E6123/6161/6165 + ethernet switch chips. + +endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile new file mode 100644 index 000000000000..2374faff4dea --- /dev/null +++ b/net/dsa/Makefile @@ -0,0 +1,13 @@ +# tagging formats +obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o +obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o +obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o + +# switch drivers +obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o +obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o +obj-$(CONFIG_NET_DSA_MV88E6123_61_65) += mv88e6123_61_65.o +obj-$(CONFIG_NET_DSA_MV88E6131) += mv88e6131.o + +# the core +obj-$(CONFIG_NET_DSA) += dsa.o slave.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c new file mode 100644 index 000000000000..33e99462023a --- /dev/null +++ b/net/dsa/dsa.c @@ -0,0 +1,392 @@ +/* + * net/dsa/dsa.c - Hardware switch handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <net/dsa.h> +#include "dsa_priv.h" + +char dsa_driver_version[] = "0.1"; + + +/* switch driver registration ***********************************************/ +static DEFINE_MUTEX(dsa_switch_drivers_mutex); +static LIST_HEAD(dsa_switch_drivers); + +void register_switch_driver(struct dsa_switch_driver *drv) +{ + mutex_lock(&dsa_switch_drivers_mutex); + list_add_tail(&drv->list, &dsa_switch_drivers); + mutex_unlock(&dsa_switch_drivers_mutex); +} + +void unregister_switch_driver(struct dsa_switch_driver *drv) +{ + mutex_lock(&dsa_switch_drivers_mutex); + list_del_init(&drv->list); + mutex_unlock(&dsa_switch_drivers_mutex); +} + +static struct dsa_switch_driver * +dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name) +{ + struct dsa_switch_driver *ret; + struct list_head *list; + char *name; + + ret = NULL; + name = NULL; + + mutex_lock(&dsa_switch_drivers_mutex); + list_for_each(list, &dsa_switch_drivers) { + struct dsa_switch_driver *drv; + + drv = list_entry(list, struct dsa_switch_driver, list); + + name = drv->probe(bus, sw_addr); + if (name != NULL) { + ret = drv; + break; + } + } + mutex_unlock(&dsa_switch_drivers_mutex); + + *_name = name; + + return ret; +} + + +/* basic switch operations **************************************************/ +static struct dsa_switch * +dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, + struct mii_bus *bus, struct net_device *dev) +{ + struct dsa_switch *ds; + int ret; + struct dsa_switch_driver *drv; + char *name; + int i; + + /* + * Probe for switch model. + */ + drv = dsa_switch_probe(bus, pd->sw_addr, &name); + if (drv == NULL) { + printk(KERN_ERR "%s: could not detect attached switch\n", + dev->name); + return ERR_PTR(-EINVAL); + } + printk(KERN_INFO "%s: detected a %s switch\n", dev->name, name); + + + /* + * Allocate and initialise switch state. + */ + ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL); + if (ds == NULL) + return ERR_PTR(-ENOMEM); + + ds->pd = pd; + ds->master_netdev = dev; + ds->master_mii_bus = bus; + + ds->drv = drv; + ds->tag_protocol = drv->tag_protocol; + + + /* + * Validate supplied switch configuration. + */ + ds->cpu_port = -1; + for (i = 0; i < DSA_MAX_PORTS; i++) { + char *name; + + name = pd->port_names[i]; + if (name == NULL) + continue; + + if (!strcmp(name, "cpu")) { + if (ds->cpu_port != -1) { + printk(KERN_ERR "multiple cpu ports?!\n"); + ret = -EINVAL; + goto out; + } + ds->cpu_port = i; + } else { + ds->valid_port_mask |= 1 << i; + } + } + + if (ds->cpu_port == -1) { + printk(KERN_ERR "no cpu port?!\n"); + ret = -EINVAL; + goto out; + } + + + /* + * If we use a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point on get + * sent to the tag format's receive function. (Which will + * discard received packets until we set ds->ports[] below.) + */ + wmb(); + dev->dsa_ptr = (void *)ds; + + + /* + * Do basic register setup. + */ + ret = drv->setup(ds); + if (ret < 0) + goto out; + + ret = drv->set_addr(ds, dev->dev_addr); + if (ret < 0) + goto out; + + ds->slave_mii_bus = mdiobus_alloc(); + if (ds->slave_mii_bus == NULL) { + ret = -ENOMEM; + goto out; + } + dsa_slave_mii_bus_init(ds); + + ret = mdiobus_register(ds->slave_mii_bus); + if (ret < 0) + goto out_free; + + + /* + * Create network devices for physical switch ports. + */ + wmb(); + for (i = 0; i < DSA_MAX_PORTS; i++) { + struct net_device *slave_dev; + + if (!(ds->valid_port_mask & (1 << i))) + continue; + + slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]); + if (slave_dev == NULL) { + printk(KERN_ERR "%s: can't create dsa slave " + "device for port %d(%s)\n", + dev->name, i, pd->port_names[i]); + continue; + } + + ds->ports[i] = slave_dev; + } + + return ds; + +out_free: + mdiobus_free(ds->slave_mii_bus); +out: + dev->dsa_ptr = NULL; + kfree(ds); + return ERR_PTR(ret); +} + +static void dsa_switch_destroy(struct dsa_switch *ds) +{ +} + + +/* hooks for ethertype-less tagging formats *********************************/ +/* + * The original DSA tag format and some other tag formats have no + * ethertype, which means that we need to add a little hack to the + * networking receive path to make sure that received frames get + * the right ->protocol assigned to them when one of those tag + * formats is in use. + */ +bool dsa_uses_dsa_tags(void *dsa_ptr) +{ + struct dsa_switch *ds = dsa_ptr; + + return !!(ds->tag_protocol == htons(ETH_P_DSA)); +} + +bool dsa_uses_trailer_tags(void *dsa_ptr) +{ + struct dsa_switch *ds = dsa_ptr; + + return !!(ds->tag_protocol == htons(ETH_P_TRAILER)); +} + + +/* link polling *************************************************************/ +static void dsa_link_poll_work(struct work_struct *ugly) +{ + struct dsa_switch *ds; + + ds = container_of(ugly, struct dsa_switch, link_poll_work); + + ds->drv->poll_link(ds); + mod_timer(&ds->link_poll_timer, round_jiffies(jiffies + HZ)); +} + +static void dsa_link_poll_timer(unsigned long _ds) +{ + struct dsa_switch *ds = (void *)_ds; + + schedule_work(&ds->link_poll_work); +} + + +/* platform driver init and cleanup *****************************************/ +static int dev_is_class(struct device *dev, void *class) +{ + if (dev->class != NULL && !strcmp(dev->class->name, class)) + return 1; + + return 0; +} + +static struct device *dev_find_class(struct device *parent, char *class) +{ + if (dev_is_class(parent, class)) { + get_device(parent); + return parent; + } + + return device_find_child(parent, class, dev_is_class); +} + +static struct mii_bus *dev_to_mii_bus(struct device *dev) +{ + struct device *d; + + d = dev_find_class(dev, "mdio_bus"); + if (d != NULL) { + struct mii_bus *bus; + + bus = to_mii_bus(d); + put_device(d); + + return bus; + } + + return NULL; +} + +static struct net_device *dev_to_net_device(struct device *dev) +{ + struct device *d; + + d = dev_find_class(dev, "net"); + if (d != NULL) { + struct net_device *nd; + + nd = to_net_dev(d); + dev_hold(nd); + put_device(d); + + return nd; + } + + return NULL; +} + +static int dsa_probe(struct platform_device *pdev) +{ + static int dsa_version_printed; + struct dsa_platform_data *pd = pdev->dev.platform_data; + struct net_device *dev; + struct mii_bus *bus; + struct dsa_switch *ds; + + if (!dsa_version_printed++) + printk(KERN_NOTICE "Distributed Switch Architecture " + "driver version %s\n", dsa_driver_version); + + if (pd == NULL || pd->mii_bus == NULL || pd->netdev == NULL) + return -EINVAL; + + bus = dev_to_mii_bus(pd->mii_bus); + if (bus == NULL) + return -EINVAL; + + dev = dev_to_net_device(pd->netdev); + if (dev == NULL) + return -EINVAL; + + if (dev->dsa_ptr != NULL) { + dev_put(dev); + return -EEXIST; + } + + ds = dsa_switch_setup(&pdev->dev, pd, bus, dev); + if (IS_ERR(ds)) { + dev_put(dev); + return PTR_ERR(ds); + } + + if (ds->drv->poll_link != NULL) { + INIT_WORK(&ds->link_poll_work, dsa_link_poll_work); + init_timer(&ds->link_poll_timer); + ds->link_poll_timer.data = (unsigned long)ds; + ds->link_poll_timer.function = dsa_link_poll_timer; + ds->link_poll_timer.expires = round_jiffies(jiffies + HZ); + add_timer(&ds->link_poll_timer); + } + + platform_set_drvdata(pdev, ds); + + return 0; +} + +static int dsa_remove(struct platform_device *pdev) +{ + struct dsa_switch *ds = platform_get_drvdata(pdev); + + if (ds->drv->poll_link != NULL) + del_timer_sync(&ds->link_poll_timer); + + flush_scheduled_work(); + + dsa_switch_destroy(ds); + + return 0; +} + +static void dsa_shutdown(struct platform_device *pdev) +{ +} + +static struct platform_driver dsa_driver = { + .probe = dsa_probe, + .remove = dsa_remove, + .shutdown = dsa_shutdown, + .driver = { + .name = "dsa", + .owner = THIS_MODULE, + }, +}; + +static int __init dsa_init_module(void) +{ + return platform_driver_register(&dsa_driver); +} +module_init(dsa_init_module); + +static void __exit dsa_cleanup_module(void) +{ + platform_driver_unregister(&dsa_driver); +} +module_exit(dsa_cleanup_module); + +MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>") +MODULE_DESCRIPTION("Driver for Distributed Switch Architecture switch chips"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:dsa"); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h new file mode 100644 index 000000000000..7063378a1ebf --- /dev/null +++ b/net/dsa/dsa_priv.h @@ -0,0 +1,116 @@ +/* + * net/dsa/dsa_priv.h - Hardware switch handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __DSA_PRIV_H +#define __DSA_PRIV_H + +#include <linux/list.h> +#include <linux/phy.h> +#include <linux/timer.h> +#include <linux/workqueue.h> +#include <net/dsa.h> + +struct dsa_switch { + /* + * Configuration data for the platform device that owns + * this dsa switch instance. + */ + struct dsa_platform_data *pd; + + /* + * References to network device and mii bus to use. + */ + struct net_device *master_netdev; + struct mii_bus *master_mii_bus; + + /* + * The used switch driver and frame tagging type. + */ + struct dsa_switch_driver *drv; + __be16 tag_protocol; + + /* + * Slave mii_bus and devices for the individual ports. + */ + int cpu_port; + u32 valid_port_mask; + struct mii_bus *slave_mii_bus; + struct net_device *ports[DSA_MAX_PORTS]; + + /* + * Link state polling. + */ + struct work_struct link_poll_work; + struct timer_list link_poll_timer; +}; + +struct dsa_slave_priv { + struct net_device *dev; + struct dsa_switch *parent; + int port; + struct phy_device *phy; +}; + +struct dsa_switch_driver { + struct list_head list; + + __be16 tag_protocol; + int priv_size; + + /* + * Probing and setup. + */ + char *(*probe)(struct mii_bus *bus, int sw_addr); + int (*setup)(struct dsa_switch *ds); + int (*set_addr)(struct dsa_switch *ds, u8 *addr); + + /* + * Access to the switch's PHY registers. + */ + int (*phy_read)(struct dsa_switch *ds, int port, int regnum); + int (*phy_write)(struct dsa_switch *ds, int port, + int regnum, u16 val); + + /* + * Link state polling and IRQ handling. + */ + void (*poll_link)(struct dsa_switch *ds); + + /* + * ethtool hardware statistics. + */ + void (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data); + void (*get_ethtool_stats)(struct dsa_switch *ds, + int port, uint64_t *data); + int (*get_sset_count)(struct dsa_switch *ds); +}; + +/* dsa.c */ +extern char dsa_driver_version[]; +void register_switch_driver(struct dsa_switch_driver *type); +void unregister_switch_driver(struct dsa_switch_driver *type); + +/* slave.c */ +void dsa_slave_mii_bus_init(struct dsa_switch *ds); +struct net_device *dsa_slave_create(struct dsa_switch *ds, + struct device *parent, + int port, char *name); + +/* tag_dsa.c */ +int dsa_xmit(struct sk_buff *skb, struct net_device *dev); + +/* tag_edsa.c */ +int edsa_xmit(struct sk_buff *skb, struct net_device *dev); + +/* tag_trailer.c */ +int trailer_xmit(struct sk_buff *skb, struct net_device *dev); + + +#endif diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c new file mode 100644 index 000000000000..54068ef251e8 --- /dev/null +++ b/net/dsa/mv88e6060.c @@ -0,0 +1,287 @@ +/* + * net/dsa/mv88e6060.c - Driver for Marvell 88e6060 switch chips + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" + +#define REG_PORT(p) (8 + (p)) +#define REG_GLOBAL 0x0f + +static int reg_read(struct dsa_switch *ds, int addr, int reg) +{ + return mdiobus_read(ds->master_mii_bus, addr, reg); +} + +#define REG_READ(addr, reg) \ + ({ \ + int __ret; \ + \ + __ret = reg_read(ds, addr, reg); \ + if (__ret < 0) \ + return __ret; \ + __ret; \ + }) + + +static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) +{ + return mdiobus_write(ds->master_mii_bus, addr, reg, val); +} + +#define REG_WRITE(addr, reg, val) \ + ({ \ + int __ret; \ + \ + __ret = reg_write(ds, addr, reg, val); \ + if (__ret < 0) \ + return __ret; \ + }) + +static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr) +{ + int ret; + + ret = mdiobus_read(bus, REG_PORT(0), 0x03); + if (ret >= 0) { + ret &= 0xfff0; + if (ret == 0x0600) + return "Marvell 88E6060"; + } + + return NULL; +} + +static int mv88e6060_switch_reset(struct dsa_switch *ds) +{ + int i; + int ret; + + /* + * Set all ports to the disabled state. + */ + for (i = 0; i < 6; i++) { + ret = REG_READ(REG_PORT(i), 0x04); + REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); + } + + /* + * Wait for transmit queues to drain. + */ + msleep(2); + + /* + * Reset the switch. + */ + REG_WRITE(REG_GLOBAL, 0x0A, 0xa130); + + /* + * Wait up to one second for reset to complete. + */ + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + if ((ret & 0x8000) == 0x0000) + break; + + msleep(1); + } + if (i == 1000) + return -ETIMEDOUT; + + return 0; +} + +static int mv88e6060_setup_global(struct dsa_switch *ds) +{ + /* + * Disable discarding of frames with excessive collisions, + * set the maximum frame size to 1536 bytes, and mask all + * interrupt sources. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0x0800); + + /* + * Enable automatic address learning, set the address + * database size to 1024 entries, and set the default aging + * time to 5 minutes. + */ + REG_WRITE(REG_GLOBAL, 0x0a, 0x2130); + + return 0; +} + +static int mv88e6060_setup_port(struct dsa_switch *ds, int p) +{ + int addr = REG_PORT(p); + + /* + * Do not force flow control, disable Ingress and Egress + * Header tagging, disable VLAN tunneling, and set the port + * state to Forwarding. Additionally, if this is the CPU + * port, enable Ingress and Egress Trailer tagging mode. + */ + REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x4103 : 0x0003); + + /* + * Port based VLAN map: give each port its own address + * database, allow the CPU port to talk to each of the 'real' + * ports, and allow each of the 'real' ports to only talk to + * the CPU port. + */ + REG_WRITE(addr, 0x06, + ((p & 0xf) << 12) | + ((p == ds->cpu_port) ? + ds->valid_port_mask : + (1 << ds->cpu_port))); + + /* + * Port Association Vector: when learning source addresses + * of packets, add the address to the address database using + * a port bitmap that has only the bit for this port set and + * the other bits clear. + */ + REG_WRITE(addr, 0x0b, 1 << p); + + return 0; +} + +static int mv88e6060_setup(struct dsa_switch *ds) +{ + int i; + int ret; + + ret = mv88e6060_switch_reset(ds); + if (ret < 0) + return ret; + + /* @@@ initialise atu */ + + ret = mv88e6060_setup_global(ds); + if (ret < 0) + return ret; + + for (i = 0; i < 6; i++) { + ret = mv88e6060_setup_port(ds, i); + if (ret < 0) + return ret; + } + + return 0; +} + +static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr) +{ + REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]); + REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]); + REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]); + + return 0; +} + +static int mv88e6060_port_to_phy_addr(int port) +{ + if (port >= 0 && port <= 5) + return port; + return -1; +} + +static int mv88e6060_phy_read(struct dsa_switch *ds, int port, int regnum) +{ + int addr; + + addr = mv88e6060_port_to_phy_addr(port); + if (addr == -1) + return 0xffff; + + return reg_read(ds, addr, regnum); +} + +static int +mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) +{ + int addr; + + addr = mv88e6060_port_to_phy_addr(port); + if (addr == -1) + return 0xffff; + + return reg_write(ds, addr, regnum, val); +} + +static void mv88e6060_poll_link(struct dsa_switch *ds) +{ + int i; + + for (i = 0; i < DSA_MAX_PORTS; i++) { + struct net_device *dev; + int port_status; + int link; + int speed; + int duplex; + int fc; + + dev = ds->ports[i]; + if (dev == NULL) + continue; + + link = 0; + if (dev->flags & IFF_UP) { + port_status = reg_read(ds, REG_PORT(i), 0x00); + if (port_status < 0) + continue; + + link = !!(port_status & 0x1000); + } + + if (!link) { + if (netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link down\n", dev->name); + netif_carrier_off(dev); + } + continue; + } + + speed = (port_status & 0x0100) ? 100 : 10; + duplex = (port_status & 0x0200) ? 1 : 0; + fc = ((port_status & 0xc000) == 0xc000) ? 1 : 0; + + if (!netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, " + "flow control %sabled\n", dev->name, + speed, duplex ? "full" : "half", + fc ? "en" : "dis"); + netif_carrier_on(dev); + } + } +} + +static struct dsa_switch_driver mv88e6060_switch_driver = { + .tag_protocol = htons(ETH_P_TRAILER), + .probe = mv88e6060_probe, + .setup = mv88e6060_setup, + .set_addr = mv88e6060_set_addr, + .phy_read = mv88e6060_phy_read, + .phy_write = mv88e6060_phy_write, + .poll_link = mv88e6060_poll_link, +}; + +int __init mv88e6060_init(void) +{ + register_switch_driver(&mv88e6060_switch_driver); + return 0; +} +module_init(mv88e6060_init); + +void __exit mv88e6060_cleanup(void) +{ + unregister_switch_driver(&mv88e6060_switch_driver); +} +module_exit(mv88e6060_cleanup); diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c new file mode 100644 index 000000000000..555b164082fc --- /dev/null +++ b/net/dsa/mv88e6123_61_65.c @@ -0,0 +1,421 @@ +/* + * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" +#include "mv88e6xxx.h" + +static char *mv88e6123_61_65_probe(struct mii_bus *bus, int sw_addr) +{ + int ret; + + ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); + if (ret >= 0) { + ret &= 0xfff0; + if (ret == 0x1210) + return "Marvell 88E6123"; + if (ret == 0x1610) + return "Marvell 88E6161"; + if (ret == 0x1650) + return "Marvell 88E6165"; + } + + return NULL; +} + +static int mv88e6123_61_65_switch_reset(struct dsa_switch *ds) +{ + int i; + int ret; + + /* + * Set all ports to the disabled state. + */ + for (i = 0; i < 8; i++) { + ret = REG_READ(REG_PORT(i), 0x04); + REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); + } + + /* + * Wait for transmit queues to drain. + */ + msleep(2); + + /* + * Reset the switch. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0xc400); + + /* + * Wait up to one second for reset to complete. + */ + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + if ((ret & 0xc800) == 0xc800) + break; + + msleep(1); + } + if (i == 1000) + return -ETIMEDOUT; + + return 0; +} + +static int mv88e6123_61_65_setup_global(struct dsa_switch *ds) +{ + int ret; + int i; + + /* + * Disable the PHY polling unit (since there won't be any + * external PHYs to poll), don't discard packets with + * excessive collisions, and mask all interrupt sources. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0x0000); + + /* + * Set the default address aging time to 5 minutes, and + * enable address learn messages to be sent to all message + * ports. + */ + REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); + + /* + * Configure the priority mapping registers. + */ + ret = mv88e6xxx_config_prio(ds); + if (ret < 0) + return ret; + + /* + * Configure the cpu port, and configure the cpu port as the + * port to which ingress and egress monitor frames are to be + * sent. + */ + REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1110)); + + /* + * Disable remote management for now, and set the switch's + * DSA device number to zero. + */ + REG_WRITE(REG_GLOBAL, 0x1c, 0x0000); + + /* + * Send all frames with destination addresses matching + * 01:80:c2:00:00:2x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, 0x02, 0xffff); + + /* + * Send all frames with destination addresses matching + * 01:80:c2:00:00:0x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); + + /* + * Disable the loopback filter, disable flow control + * messages, disable flood broadcast override, disable + * removing of provider tags, disable ATU age violation + * interrupts, disable tag flow control, force flow + * control priority to the highest, and send all special + * multicast frames to the CPU at the highest priority. + */ + REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); + + /* + * Map all DSA device IDs to the CPU port. + */ + for (i = 0; i < 32; i++) + REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port); + + /* + * Clear all trunk masks. + */ + for (i = 0; i < 8; i++) + REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); + + /* + * Clear all trunk mappings. + */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); + + /* + * Disable ingress rate limiting by resetting all ingress + * rate limit registers to their initial state. + */ + for (i = 0; i < 6; i++) + REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8)); + + /* + * Initialise cross-chip port VLAN table to reset defaults. + */ + REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000); + + /* + * Clear the priority override table. + */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8)); + + /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */ + + return 0; +} + +static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) +{ + int addr = REG_PORT(p); + + /* + * MAC Forcing register: don't force link, speed, duplex + * or flow control state to any particular values. + */ + REG_WRITE(addr, 0x01, 0x0003); + + /* + * Do not limit the period of time that this port can be + * paused for by the remote end or the period of time that + * this port can pause the remote end. + */ + REG_WRITE(addr, 0x02, 0x0000); + + /* + * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, + * configure the requested (DSA/EDSA) tagging mode if this is + * the CPU port, disable Header mode, enable IGMP/MLD snooping, + * disable VLAN tunneling, determine priority by looking at + * 802.1p and IP priority fields (IP prio has precedence), and + * set STP state to Forwarding. Finally, if this is the CPU + * port, additionally enable forwarding of unknown unicast and + * multicast addresses. + */ + REG_WRITE(addr, 0x04, + (p == ds->cpu_port) ? + (ds->tag_protocol == htons(ETH_P_DSA)) ? + 0x053f : 0x373f : + 0x0433); + + /* + * Port Control 1: disable trunking. Also, if this is the + * CPU port, enable learn messages to be sent to this port. + */ + REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000); + + /* + * Port based VLAN map: give each port its own address + * database, allow the CPU port to talk to each of the 'real' + * ports, and allow each of the 'real' ports to only talk to + * the CPU port. + */ + REG_WRITE(addr, 0x06, + ((p & 0xf) << 12) | + ((p == ds->cpu_port) ? + ds->valid_port_mask : + (1 << ds->cpu_port))); + + /* + * Default VLAN ID and priority: don't set a default VLAN + * ID, and set the default packet priority to zero. + */ + REG_WRITE(addr, 0x07, 0x0000); + + /* + * Port Control 2: don't force a good FCS, set the maximum + * frame size to 10240 bytes, don't let the switch add or + * strip 802.1q tags, don't discard tagged or untagged frames + * on this port, do a destination address lookup on all + * received packets as usual, disable ARP mirroring and don't + * send a copy of all transmitted/received frames on this port + * to the CPU. + */ + REG_WRITE(addr, 0x08, 0x2080); + + /* + * Egress rate control: disable egress rate control. + */ + REG_WRITE(addr, 0x09, 0x0001); + + /* + * Egress rate control 2: disable egress rate control. + */ + REG_WRITE(addr, 0x0a, 0x0000); + + /* + * Port Association Vector: when learning source addresses + * of packets, add the address to the address database using + * a port bitmap that has only the bit for this port set and + * the other bits clear. + */ + REG_WRITE(addr, 0x0b, 1 << p); + + /* + * Port ATU control: disable limiting the number of address + * database entries that this port is allowed to use. + */ + REG_WRITE(addr, 0x0c, 0x0000); + + /* + * Priorit Override: disable DA, SA and VTU priority override. + */ + REG_WRITE(addr, 0x0d, 0x0000); + + /* + * Port Ethertype: use the Ethertype DSA Ethertype value. + */ + REG_WRITE(addr, 0x0f, ETH_P_EDSA); + + /* + * Tag Remap: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x18, 0x3210); + + /* + * Tag Remap 2: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x19, 0x7654); + + return 0; +} + +static int mv88e6123_61_65_setup(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int i; + int ret; + + mutex_init(&ps->smi_mutex); + mutex_init(&ps->stats_mutex); + + ret = mv88e6123_61_65_switch_reset(ds); + if (ret < 0) + return ret; + + /* @@@ initialise vtu and atu */ + + ret = mv88e6123_61_65_setup_global(ds); + if (ret < 0) + return ret; + + for (i = 0; i < 6; i++) { + ret = mv88e6123_61_65_setup_port(ds, i); + if (ret < 0) + return ret; + } + + return 0; +} + +static int mv88e6123_61_65_port_to_phy_addr(int port) +{ + if (port >= 0 && port <= 4) + return port; + return -1; +} + +static int +mv88e6123_61_65_phy_read(struct dsa_switch *ds, int port, int regnum) +{ + int addr = mv88e6123_61_65_port_to_phy_addr(port); + return mv88e6xxx_phy_read(ds, addr, regnum); +} + +static int +mv88e6123_61_65_phy_write(struct dsa_switch *ds, + int port, int regnum, u16 val) +{ + int addr = mv88e6123_61_65_port_to_phy_addr(port); + return mv88e6xxx_phy_write(ds, addr, regnum, val); +} + +static struct mv88e6xxx_hw_stat mv88e6123_61_65_hw_stats[] = { + { "in_good_octets", 8, 0x00, }, + { "in_bad_octets", 4, 0x02, }, + { "in_unicast", 4, 0x04, }, + { "in_broadcasts", 4, 0x06, }, + { "in_multicasts", 4, 0x07, }, + { "in_pause", 4, 0x16, }, + { "in_undersize", 4, 0x18, }, + { "in_fragments", 4, 0x19, }, + { "in_oversize", 4, 0x1a, }, + { "in_jabber", 4, 0x1b, }, + { "in_rx_error", 4, 0x1c, }, + { "in_fcs_error", 4, 0x1d, }, + { "out_octets", 8, 0x0e, }, + { "out_unicast", 4, 0x10, }, + { "out_broadcasts", 4, 0x13, }, + { "out_multicasts", 4, 0x12, }, + { "out_pause", 4, 0x15, }, + { "excessive", 4, 0x11, }, + { "collisions", 4, 0x1e, }, + { "deferred", 4, 0x05, }, + { "single", 4, 0x14, }, + { "multiple", 4, 0x17, }, + { "out_fcs_error", 4, 0x03, }, + { "late", 4, 0x1f, }, + { "hist_64bytes", 4, 0x08, }, + { "hist_65_127bytes", 4, 0x09, }, + { "hist_128_255bytes", 4, 0x0a, }, + { "hist_256_511bytes", 4, 0x0b, }, + { "hist_512_1023bytes", 4, 0x0c, }, + { "hist_1024_max_bytes", 4, 0x0d, }, +}; + +static void +mv88e6123_61_65_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +{ + mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats), + mv88e6123_61_65_hw_stats, port, data); +} + +static void +mv88e6123_61_65_get_ethtool_stats(struct dsa_switch *ds, + int port, uint64_t *data) +{ + mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats), + mv88e6123_61_65_hw_stats, port, data); +} + +static int mv88e6123_61_65_get_sset_count(struct dsa_switch *ds) +{ + return ARRAY_SIZE(mv88e6123_61_65_hw_stats); +} + +static struct dsa_switch_driver mv88e6123_61_65_switch_driver = { + .tag_protocol = __constant_htons(ETH_P_EDSA), + .priv_size = sizeof(struct mv88e6xxx_priv_state), + .probe = mv88e6123_61_65_probe, + .setup = mv88e6123_61_65_setup, + .set_addr = mv88e6xxx_set_addr_indirect, + .phy_read = mv88e6123_61_65_phy_read, + .phy_write = mv88e6123_61_65_phy_write, + .poll_link = mv88e6xxx_poll_link, + .get_strings = mv88e6123_61_65_get_strings, + .get_ethtool_stats = mv88e6123_61_65_get_ethtool_stats, + .get_sset_count = mv88e6123_61_65_get_sset_count, +}; + +int __init mv88e6123_61_65_init(void) +{ + register_switch_driver(&mv88e6123_61_65_switch_driver); + return 0; +} +module_init(mv88e6123_61_65_init); + +void __exit mv88e6123_61_65_cleanup(void) +{ + unregister_switch_driver(&mv88e6123_61_65_switch_driver); +} +module_exit(mv88e6123_61_65_cleanup); diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c new file mode 100644 index 000000000000..36e01eb863a0 --- /dev/null +++ b/net/dsa/mv88e6131.c @@ -0,0 +1,380 @@ +/* + * net/dsa/mv88e6131.c - Marvell 88e6131 switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" +#include "mv88e6xxx.h" + +static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) +{ + int ret; + + ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); + if (ret >= 0) { + ret &= 0xfff0; + if (ret == 0x1060) + return "Marvell 88E6131"; + } + + return NULL; +} + +static int mv88e6131_switch_reset(struct dsa_switch *ds) +{ + int i; + int ret; + + /* + * Set all ports to the disabled state. + */ + for (i = 0; i < 8; i++) { + ret = REG_READ(REG_PORT(i), 0x04); + REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); + } + + /* + * Wait for transmit queues to drain. + */ + msleep(2); + + /* + * Reset the switch. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0xc400); + + /* + * Wait up to one second for reset to complete. + */ + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + if ((ret & 0xc800) == 0xc800) + break; + + msleep(1); + } + if (i == 1000) + return -ETIMEDOUT; + + return 0; +} + +static int mv88e6131_setup_global(struct dsa_switch *ds) +{ + int ret; + int i; + + /* + * Enable the PHY polling unit, don't discard packets with + * excessive collisions, use a weighted fair queueing scheme + * to arbitrate between packet queues, set the maximum frame + * size to 1632, and mask all interrupt sources. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0x4400); + + /* + * Set the default address aging time to 5 minutes, and + * enable address learn messages to be sent to all message + * ports. + */ + REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); + + /* + * Configure the priority mapping registers. + */ + ret = mv88e6xxx_config_prio(ds); + if (ret < 0) + return ret; + + /* + * Set the VLAN ethertype to 0x8100. + */ + REG_WRITE(REG_GLOBAL, 0x19, 0x8100); + + /* + * Disable ARP mirroring, and configure the cpu port as the + * port to which ingress and egress monitor frames are to be + * sent. + */ + REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1100) | 0x00f0); + + /* + * Disable cascade port functionality, and set the switch's + * DSA device number to zero. + */ + REG_WRITE(REG_GLOBAL, 0x1c, 0xe000); + + /* + * Send all frames with destination addresses matching + * 01:80:c2:00:00:0x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); + + /* + * Ignore removed tag data on doubly tagged packets, disable + * flow control messages, force flow control priority to the + * highest, and send all special multicast frames to the CPU + * port at the higest priority. + */ + REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); + + /* + * Map all DSA device IDs to the CPU port. + */ + for (i = 0; i < 32; i++) + REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port); + + /* + * Clear all trunk masks. + */ + for (i = 0; i < 8; i++) + REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); + + /* + * Clear all trunk mappings. + */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); + + /* + * Force the priority of IGMP/MLD snoop frames and ARP frames + * to the highest setting. + */ + REG_WRITE(REG_GLOBAL2, 0x0f, 0x00ff); + + return 0; +} + +static int mv88e6131_setup_port(struct dsa_switch *ds, int p) +{ + int addr = REG_PORT(p); + + /* + * MAC Forcing register: don't force link, speed, duplex + * or flow control state to any particular values. + */ + REG_WRITE(addr, 0x01, 0x0003); + + /* + * Port Control: disable Core Tag, disable Drop-on-Lock, + * transmit frames unmodified, disable Header mode, + * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN + * tunneling, determine priority by looking at 802.1p and + * IP priority fields (IP prio has precedence), and set STP + * state to Forwarding. Finally, if this is the CPU port, + * additionally enable DSA tagging and forwarding of unknown + * unicast addresses. + */ + REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x0537 : 0x0433); + + /* + * Port Control 1: disable trunking. Also, if this is the + * CPU port, enable learn messages to be sent to this port. + */ + REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000); + + /* + * Port based VLAN map: give each port its own address + * database, allow the CPU port to talk to each of the 'real' + * ports, and allow each of the 'real' ports to only talk to + * the CPU port. + */ + REG_WRITE(addr, 0x06, + ((p & 0xf) << 12) | + ((p == ds->cpu_port) ? + ds->valid_port_mask : + (1 << ds->cpu_port))); + + /* + * Default VLAN ID and priority: don't set a default VLAN + * ID, and set the default packet priority to zero. + */ + REG_WRITE(addr, 0x07, 0x0000); + + /* + * Port Control 2: don't force a good FCS, don't use + * VLAN-based, source address-based or destination + * address-based priority overrides, don't let the switch + * add or strip 802.1q tags, don't discard tagged or + * untagged frames on this port, do a destination address + * lookup on received packets as usual, don't send a copy + * of all transmitted/received frames on this port to the + * CPU, and configure the CPU port number. Also, if this + * is the CPU port, enable forwarding of unknown multicast + * addresses. + */ + REG_WRITE(addr, 0x08, + ((p == ds->cpu_port) ? 0x00c0 : 0x0080) | + ds->cpu_port); + + /* + * Rate Control: disable ingress rate limiting. + */ + REG_WRITE(addr, 0x09, 0x0000); + + /* + * Rate Control 2: disable egress rate limiting. + */ + REG_WRITE(addr, 0x0a, 0x0000); + + /* + * Port Association Vector: when learning source addresses + * of packets, add the address to the address database using + * a port bitmap that has only the bit for this port set and + * the other bits clear. + */ + REG_WRITE(addr, 0x0b, 1 << p); + + /* + * Tag Remap: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x18, 0x3210); + + /* + * Tag Remap 2: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x19, 0x7654); + + return 0; +} + +static int mv88e6131_setup(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int i; + int ret; + + mutex_init(&ps->smi_mutex); + mv88e6xxx_ppu_state_init(ds); + mutex_init(&ps->stats_mutex); + + ret = mv88e6131_switch_reset(ds); + if (ret < 0) + return ret; + + /* @@@ initialise vtu and atu */ + + ret = mv88e6131_setup_global(ds); + if (ret < 0) + return ret; + + for (i = 0; i < 6; i++) { + ret = mv88e6131_setup_port(ds, i); + if (ret < 0) + return ret; + } + + return 0; +} + +static int mv88e6131_port_to_phy_addr(int port) +{ + if (port >= 0 && port != 3 && port <= 7) + return port; + return -1; +} + +static int +mv88e6131_phy_read(struct dsa_switch *ds, int port, int regnum) +{ + int addr = mv88e6131_port_to_phy_addr(port); + return mv88e6xxx_phy_read_ppu(ds, addr, regnum); +} + +static int +mv88e6131_phy_write(struct dsa_switch *ds, + int port, int regnum, u16 val) +{ + int addr = mv88e6131_port_to_phy_addr(port); + return mv88e6xxx_phy_write_ppu(ds, addr, regnum, val); +} + +static struct mv88e6xxx_hw_stat mv88e6131_hw_stats[] = { + { "in_good_octets", 8, 0x00, }, + { "in_bad_octets", 4, 0x02, }, + { "in_unicast", 4, 0x04, }, + { "in_broadcasts", 4, 0x06, }, + { "in_multicasts", 4, 0x07, }, + { "in_pause", 4, 0x16, }, + { "in_undersize", 4, 0x18, }, + { "in_fragments", 4, 0x19, }, + { "in_oversize", 4, 0x1a, }, + { "in_jabber", 4, 0x1b, }, + { "in_rx_error", 4, 0x1c, }, + { "in_fcs_error", 4, 0x1d, }, + { "out_octets", 8, 0x0e, }, + { "out_unicast", 4, 0x10, }, + { "out_broadcasts", 4, 0x13, }, + { "out_multicasts", 4, 0x12, }, + { "out_pause", 4, 0x15, }, + { "excessive", 4, 0x11, }, + { "collisions", 4, 0x1e, }, + { "deferred", 4, 0x05, }, + { "single", 4, 0x14, }, + { "multiple", 4, 0x17, }, + { "out_fcs_error", 4, 0x03, }, + { "late", 4, 0x1f, }, + { "hist_64bytes", 4, 0x08, }, + { "hist_65_127bytes", 4, 0x09, }, + { "hist_128_255bytes", 4, 0x0a, }, + { "hist_256_511bytes", 4, 0x0b, }, + { "hist_512_1023bytes", 4, 0x0c, }, + { "hist_1024_max_bytes", 4, 0x0d, }, +}; + +static void +mv88e6131_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +{ + mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6131_hw_stats), + mv88e6131_hw_stats, port, data); +} + +static void +mv88e6131_get_ethtool_stats(struct dsa_switch *ds, + int port, uint64_t *data) +{ + mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6131_hw_stats), + mv88e6131_hw_stats, port, data); +} + +static int mv88e6131_get_sset_count(struct dsa_switch *ds) +{ + return ARRAY_SIZE(mv88e6131_hw_stats); +} + +static struct dsa_switch_driver mv88e6131_switch_driver = { + .tag_protocol = __constant_htons(ETH_P_DSA), + .priv_size = sizeof(struct mv88e6xxx_priv_state), + .probe = mv88e6131_probe, + .setup = mv88e6131_setup, + .set_addr = mv88e6xxx_set_addr_direct, + .phy_read = mv88e6131_phy_read, + .phy_write = mv88e6131_phy_write, + .poll_link = mv88e6xxx_poll_link, + .get_strings = mv88e6131_get_strings, + .get_ethtool_stats = mv88e6131_get_ethtool_stats, + .get_sset_count = mv88e6131_get_sset_count, +}; + +int __init mv88e6131_init(void) +{ + register_switch_driver(&mv88e6131_switch_driver); + return 0; +} +module_init(mv88e6131_init); + +void __exit mv88e6131_cleanup(void) +{ + unregister_switch_driver(&mv88e6131_switch_driver); +} +module_exit(mv88e6131_cleanup); diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c new file mode 100644 index 000000000000..aa6c609c59f2 --- /dev/null +++ b/net/dsa/mv88e6xxx.c @@ -0,0 +1,522 @@ +/* + * net/dsa/mv88e6xxx.c - Marvell 88e6xxx switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" +#include "mv88e6xxx.h" + +/* + * If the switch's ADDR[4:0] strap pins are strapped to zero, it will + * use all 32 SMI bus addresses on its SMI bus, and all switch registers + * will be directly accessible on some {device address,register address} + * pair. If the ADDR[4:0] pins are not strapped to zero, the switch + * will only respond to SMI transactions to that specific address, and + * an indirect addressing mechanism needs to be used to access its + * registers. + */ +static int mv88e6xxx_reg_wait_ready(struct mii_bus *bus, int sw_addr) +{ + int ret; + int i; + + for (i = 0; i < 16; i++) { + ret = mdiobus_read(bus, sw_addr, 0); + if (ret < 0) + return ret; + + if ((ret & 0x8000) == 0) + return 0; + } + + return -ETIMEDOUT; +} + +int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg) +{ + int ret; + + if (sw_addr == 0) + return mdiobus_read(bus, addr, reg); + + /* + * Wait for the bus to become free. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + /* + * Transmit the read command. + */ + ret = mdiobus_write(bus, sw_addr, 0, 0x9800 | (addr << 5) | reg); + if (ret < 0) + return ret; + + /* + * Wait for the read command to complete. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + /* + * Read the data. + */ + ret = mdiobus_read(bus, sw_addr, 1); + if (ret < 0) + return ret; + + return ret & 0xffff; +} + +int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = __mv88e6xxx_reg_read(ds->master_mii_bus, + ds->pd->sw_addr, addr, reg); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, + int reg, u16 val) +{ + int ret; + + if (sw_addr == 0) + return mdiobus_write(bus, addr, reg, val); + + /* + * Wait for the bus to become free. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + /* + * Transmit the data to write. + */ + ret = mdiobus_write(bus, sw_addr, 1, val); + if (ret < 0) + return ret; + + /* + * Transmit the write command. + */ + ret = mdiobus_write(bus, sw_addr, 0, 0x9400 | (addr << 5) | reg); + if (ret < 0) + return ret; + + /* + * Wait for the write command to complete. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + return 0; +} + +int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = __mv88e6xxx_reg_write(ds->master_mii_bus, + ds->pd->sw_addr, addr, reg, val); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +int mv88e6xxx_config_prio(struct dsa_switch *ds) +{ + /* + * Configure the IP ToS mapping registers. + */ + REG_WRITE(REG_GLOBAL, 0x10, 0x0000); + REG_WRITE(REG_GLOBAL, 0x11, 0x0000); + REG_WRITE(REG_GLOBAL, 0x12, 0x5555); + REG_WRITE(REG_GLOBAL, 0x13, 0x5555); + REG_WRITE(REG_GLOBAL, 0x14, 0xaaaa); + REG_WRITE(REG_GLOBAL, 0x15, 0xaaaa); + REG_WRITE(REG_GLOBAL, 0x16, 0xffff); + REG_WRITE(REG_GLOBAL, 0x17, 0xffff); + + /* + * Configure the IEEE 802.1p priority mapping register. + */ + REG_WRITE(REG_GLOBAL, 0x18, 0xfa41); + + return 0; +} + +int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr) +{ + REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]); + REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]); + REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]); + + return 0; +} + +int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr) +{ + int i; + int ret; + + for (i = 0; i < 6; i++) { + int j; + + /* + * Write the MAC address byte. + */ + REG_WRITE(REG_GLOBAL2, 0x0d, 0x8000 | (i << 8) | addr[i]); + + /* + * Wait for the write to complete. + */ + for (j = 0; j < 16; j++) { + ret = REG_READ(REG_GLOBAL2, 0x0d); + if ((ret & 0x8000) == 0) + break; + } + if (j == 16) + return -ETIMEDOUT; + } + + return 0; +} + +int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum) +{ + if (addr >= 0) + return mv88e6xxx_reg_read(ds, addr, regnum); + return 0xffff; +} + +int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val) +{ + if (addr >= 0) + return mv88e6xxx_reg_write(ds, addr, regnum, val); + return 0; +} + +#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU +static int mv88e6xxx_ppu_disable(struct dsa_switch *ds) +{ + int ret; + int i; + + ret = REG_READ(REG_GLOBAL, 0x04); + REG_WRITE(REG_GLOBAL, 0x04, ret & ~0x4000); + + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + msleep(1); + if ((ret & 0xc000) != 0xc000) + return 0; + } + + return -ETIMEDOUT; +} + +static int mv88e6xxx_ppu_enable(struct dsa_switch *ds) +{ + int ret; + int i; + + ret = REG_READ(REG_GLOBAL, 0x04); + REG_WRITE(REG_GLOBAL, 0x04, ret | 0x4000); + + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + msleep(1); + if ((ret & 0xc000) == 0xc000) + return 0; + } + + return -ETIMEDOUT; +} + +static void mv88e6xxx_ppu_reenable_work(struct work_struct *ugly) +{ + struct mv88e6xxx_priv_state *ps; + + ps = container_of(ugly, struct mv88e6xxx_priv_state, ppu_work); + if (mutex_trylock(&ps->ppu_mutex)) { + struct dsa_switch *ds = ((struct dsa_switch *)ps) - 1; + + if (mv88e6xxx_ppu_enable(ds) == 0) + ps->ppu_disabled = 0; + mutex_unlock(&ps->ppu_mutex); + } +} + +static void mv88e6xxx_ppu_reenable_timer(unsigned long _ps) +{ + struct mv88e6xxx_priv_state *ps = (void *)_ps; + + schedule_work(&ps->ppu_work); +} + +static int mv88e6xxx_ppu_access_get(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + + mutex_lock(&ps->ppu_mutex); + + /* + * If the PHY polling unit is enabled, disable it so that + * we can access the PHY registers. If it was already + * disabled, cancel the timer that is going to re-enable + * it. + */ + if (!ps->ppu_disabled) { + ret = mv88e6xxx_ppu_disable(ds); + if (ret < 0) { + mutex_unlock(&ps->ppu_mutex); + return ret; + } + ps->ppu_disabled = 1; + } else { + del_timer(&ps->ppu_timer); + ret = 0; + } + + return ret; +} + +static void mv88e6xxx_ppu_access_put(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + + /* + * Schedule a timer to re-enable the PHY polling unit. + */ + mod_timer(&ps->ppu_timer, jiffies + msecs_to_jiffies(10)); + mutex_unlock(&ps->ppu_mutex); +} + +void mv88e6xxx_ppu_state_init(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + + mutex_init(&ps->ppu_mutex); + INIT_WORK(&ps->ppu_work, mv88e6xxx_ppu_reenable_work); + init_timer(&ps->ppu_timer); + ps->ppu_timer.data = (unsigned long)ps; + ps->ppu_timer.function = mv88e6xxx_ppu_reenable_timer; +} + +int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum) +{ + int ret; + + ret = mv88e6xxx_ppu_access_get(ds); + if (ret >= 0) { + ret = mv88e6xxx_reg_read(ds, addr, regnum); + mv88e6xxx_ppu_access_put(ds); + } + + return ret; +} + +int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr, + int regnum, u16 val) +{ + int ret; + + ret = mv88e6xxx_ppu_access_get(ds); + if (ret >= 0) { + ret = mv88e6xxx_reg_write(ds, addr, regnum, val); + mv88e6xxx_ppu_access_put(ds); + } + + return ret; +} +#endif + +void mv88e6xxx_poll_link(struct dsa_switch *ds) +{ + int i; + + for (i = 0; i < DSA_MAX_PORTS; i++) { + struct net_device *dev; + int port_status; + int link; + int speed; + int duplex; + int fc; + + dev = ds->ports[i]; + if (dev == NULL) + continue; + + link = 0; + if (dev->flags & IFF_UP) { + port_status = mv88e6xxx_reg_read(ds, REG_PORT(i), 0x00); + if (port_status < 0) + continue; + + link = !!(port_status & 0x0800); + } + + if (!link) { + if (netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link down\n", dev->name); + netif_carrier_off(dev); + } + continue; + } + + switch (port_status & 0x0300) { + case 0x0000: + speed = 10; + break; + case 0x0100: + speed = 100; + break; + case 0x0200: + speed = 1000; + break; + default: + speed = -1; + break; + } + duplex = (port_status & 0x0400) ? 1 : 0; + fc = (port_status & 0x8000) ? 1 : 0; + + if (!netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, " + "flow control %sabled\n", dev->name, + speed, duplex ? "full" : "half", + fc ? "en" : "dis"); + netif_carrier_on(dev); + } + } +} + +static int mv88e6xxx_stats_wait(struct dsa_switch *ds) +{ + int ret; + int i; + + for (i = 0; i < 10; i++) { + ret = REG_READ(REG_GLOBAL2, 0x1d); + if ((ret & 0x8000) == 0) + return 0; + } + + return -ETIMEDOUT; +} + +static int mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) +{ + int ret; + + /* + * Snapshot the hardware statistics counters for this port. + */ + REG_WRITE(REG_GLOBAL, 0x1d, 0xdc00 | port); + + /* + * Wait for the snapshotting to complete. + */ + ret = mv88e6xxx_stats_wait(ds); + if (ret < 0) + return ret; + + return 0; +} + +static void mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val) +{ + u32 _val; + int ret; + + *val = 0; + + ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x1d, 0xcc00 | stat); + if (ret < 0) + return; + + ret = mv88e6xxx_stats_wait(ds); + if (ret < 0) + return; + + ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1e); + if (ret < 0) + return; + + _val = ret << 16; + + ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1f); + if (ret < 0) + return; + + *val = _val | ret; +} + +void mv88e6xxx_get_strings(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint8_t *data) +{ + int i; + + for (i = 0; i < nr_stats; i++) { + memcpy(data + i * ETH_GSTRING_LEN, + stats[i].string, ETH_GSTRING_LEN); + } +} + +void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint64_t *data) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + int i; + + mutex_lock(&ps->stats_mutex); + + ret = mv88e6xxx_stats_snapshot(ds, port); + if (ret < 0) { + mutex_unlock(&ps->stats_mutex); + return; + } + + /* + * Read each of the counters. + */ + for (i = 0; i < nr_stats; i++) { + struct mv88e6xxx_hw_stat *s = stats + i; + u32 low; + u32 high; + + mv88e6xxx_stats_read(ds, s->reg, &low); + if (s->sizeof_stat == 8) + mv88e6xxx_stats_read(ds, s->reg + 1, &high); + else + high = 0; + + data[i] = (((u64)high) << 32) | low; + } + + mutex_unlock(&ps->stats_mutex); +} diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h new file mode 100644 index 000000000000..eb0e0aaa9f1b --- /dev/null +++ b/net/dsa/mv88e6xxx.h @@ -0,0 +1,93 @@ +/* + * net/dsa/mv88e6xxx.h - Marvell 88e6xxx switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __MV88E6XXX_H +#define __MV88E6XXX_H + +#define REG_PORT(p) (0x10 + (p)) +#define REG_GLOBAL 0x1b +#define REG_GLOBAL2 0x1c + +struct mv88e6xxx_priv_state { + /* + * When using multi-chip addressing, this mutex protects + * access to the indirect access registers. (In single-chip + * mode, this mutex is effectively useless.) + */ + struct mutex smi_mutex; + +#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU + /* + * Handles automatic disabling and re-enabling of the PHY + * polling unit. + */ + struct mutex ppu_mutex; + int ppu_disabled; + struct work_struct ppu_work; + struct timer_list ppu_timer; +#endif + + /* + * This mutex serialises access to the statistics unit. + * Hold this mutex over snapshot + dump sequences. + */ + struct mutex stats_mutex; +}; + +struct mv88e6xxx_hw_stat { + char string[ETH_GSTRING_LEN]; + int sizeof_stat; + int reg; +}; + +int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg); +int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg); +int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, + int reg, u16 val); +int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val); +int mv88e6xxx_config_prio(struct dsa_switch *ds); +int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr); +int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr); +int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum); +int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val); +void mv88e6xxx_ppu_state_init(struct dsa_switch *ds); +int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum); +int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr, + int regnum, u16 val); +void mv88e6xxx_poll_link(struct dsa_switch *ds); +void mv88e6xxx_get_strings(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint8_t *data); +void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint64_t *data); + +#define REG_READ(addr, reg) \ + ({ \ + int __ret; \ + \ + __ret = mv88e6xxx_reg_read(ds, addr, reg); \ + if (__ret < 0) \ + return __ret; \ + __ret; \ + }) + +#define REG_WRITE(addr, reg, val) \ + ({ \ + int __ret; \ + \ + __ret = mv88e6xxx_reg_write(ds, addr, reg, val); \ + if (__ret < 0) \ + return __ret; \ + }) + + + +#endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c new file mode 100644 index 000000000000..37616884b8a9 --- /dev/null +++ b/net/dsa/slave.c @@ -0,0 +1,298 @@ +/* + * net/dsa/slave.c - Slave device handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" + +/* slave mii_bus handling ***************************************************/ +static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) +{ + struct dsa_switch *ds = bus->priv; + + if (ds->valid_port_mask & (1 << addr)) + return ds->drv->phy_read(ds, addr, reg); + + return 0xffff; +} + +static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) +{ + struct dsa_switch *ds = bus->priv; + + if (ds->valid_port_mask & (1 << addr)) + return ds->drv->phy_write(ds, addr, reg, val); + + return 0; +} + +void dsa_slave_mii_bus_init(struct dsa_switch *ds) +{ + ds->slave_mii_bus->priv = (void *)ds; + ds->slave_mii_bus->name = "dsa slave smi"; + ds->slave_mii_bus->read = dsa_slave_phy_read; + ds->slave_mii_bus->write = dsa_slave_phy_write; + snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x", + ds->master_mii_bus->id, ds->pd->sw_addr); + ds->slave_mii_bus->parent = &(ds->master_mii_bus->dev); +} + + +/* slave device handling ****************************************************/ +static int dsa_slave_open(struct net_device *dev) +{ + return 0; +} + +static int dsa_slave_close(struct net_device *dev) +{ + return 0; +} + +static void dsa_slave_change_rx_flags(struct net_device *dev, int change) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1); +} + +static void dsa_slave_set_rx_mode(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + + dev_mc_sync(master, dev); + dev_unicast_sync(master, dev); +} + +static int dsa_slave_set_mac_address(struct net_device *dev, void *addr) +{ + memcpy(dev->dev_addr, addr + 2, 6); + + return 0; +} + +static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct mii_ioctl_data *mii_data = if_mii(ifr); + + if (p->phy != NULL) + return phy_mii_ioctl(p->phy, mii_data, cmd); + + return -EOPNOTSUPP; +} + + +/* ethtool operations *******************************************************/ +static int +dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + int err; + + err = -EOPNOTSUPP; + if (p->phy != NULL) { + err = phy_read_status(p->phy); + if (err == 0) + err = phy_ethtool_gset(p->phy, cmd); + } + + return err; +} + +static int +dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + if (p->phy != NULL) + return phy_ethtool_sset(p->phy, cmd); + + return -EOPNOTSUPP; +} + +static void dsa_slave_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strncpy(drvinfo->driver, "dsa", 32); + strncpy(drvinfo->version, dsa_driver_version, 32); + strncpy(drvinfo->fw_version, "N/A", 32); + strncpy(drvinfo->bus_info, "platform", 32); +} + +static int dsa_slave_nway_reset(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + if (p->phy != NULL) + return genphy_restart_aneg(p->phy); + + return -EOPNOTSUPP; +} + +static u32 dsa_slave_get_link(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + if (p->phy != NULL) { + genphy_update_link(p->phy); + return p->phy->link; + } + + return -EOPNOTSUPP; +} + +static void dsa_slave_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (stringset == ETH_SS_STATS) { + int len = ETH_GSTRING_LEN; + + strncpy(data, "tx_packets", len); + strncpy(data + len, "tx_bytes", len); + strncpy(data + 2 * len, "rx_packets", len); + strncpy(data + 3 * len, "rx_bytes", len); + if (ds->drv->get_strings != NULL) + ds->drv->get_strings(ds, p->port, data + 4 * len); + } +} + +static void dsa_slave_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + data[0] = p->dev->stats.tx_packets; + data[1] = p->dev->stats.tx_bytes; + data[2] = p->dev->stats.rx_packets; + data[3] = p->dev->stats.rx_bytes; + if (ds->drv->get_ethtool_stats != NULL) + ds->drv->get_ethtool_stats(ds, p->port, data + 4); +} + +static int dsa_slave_get_sset_count(struct net_device *dev, int sset) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (sset == ETH_SS_STATS) { + int count; + + count = 4; + if (ds->drv->get_sset_count != NULL) + count += ds->drv->get_sset_count(ds); + + return count; + } + + return -EOPNOTSUPP; +} + +static const struct ethtool_ops dsa_slave_ethtool_ops = { + .get_settings = dsa_slave_get_settings, + .set_settings = dsa_slave_set_settings, + .get_drvinfo = dsa_slave_get_drvinfo, + .nway_reset = dsa_slave_nway_reset, + .get_link = dsa_slave_get_link, + .set_sg = ethtool_op_set_sg, + .get_strings = dsa_slave_get_strings, + .get_ethtool_stats = dsa_slave_get_ethtool_stats, + .get_sset_count = dsa_slave_get_sset_count, +}; + + +/* slave device setup *******************************************************/ +struct net_device * +dsa_slave_create(struct dsa_switch *ds, struct device *parent, + int port, char *name) +{ + struct net_device *master = ds->master_netdev; + struct net_device *slave_dev; + struct dsa_slave_priv *p; + int ret; + + slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), + name, ether_setup); + if (slave_dev == NULL) + return slave_dev; + + slave_dev->features = master->vlan_features; + SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops); + memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN); + slave_dev->tx_queue_len = 0; + switch (ds->tag_protocol) { +#ifdef CONFIG_NET_DSA_TAG_DSA + case htons(ETH_P_DSA): + slave_dev->hard_start_xmit = dsa_xmit; + break; +#endif +#ifdef CONFIG_NET_DSA_TAG_EDSA + case htons(ETH_P_EDSA): + slave_dev->hard_start_xmit = edsa_xmit; + break; +#endif +#ifdef CONFIG_NET_DSA_TAG_TRAILER + case htons(ETH_P_TRAILER): + slave_dev->hard_start_xmit = trailer_xmit; + break; +#endif + default: + BUG(); + } + slave_dev->open = dsa_slave_open; + slave_dev->stop = dsa_slave_close; + slave_dev->change_rx_flags = dsa_slave_change_rx_flags; + slave_dev->set_rx_mode = dsa_slave_set_rx_mode; + slave_dev->set_multicast_list = dsa_slave_set_rx_mode; + slave_dev->set_mac_address = dsa_slave_set_mac_address; + slave_dev->do_ioctl = dsa_slave_ioctl; + SET_NETDEV_DEV(slave_dev, parent); + slave_dev->vlan_features = master->vlan_features; + + p = netdev_priv(slave_dev); + p->dev = slave_dev; + p->parent = ds; + p->port = port; + p->phy = ds->slave_mii_bus->phy_map[port]; + + ret = register_netdev(slave_dev); + if (ret) { + printk(KERN_ERR "%s: error %d registering interface %s\n", + master->name, ret, slave_dev->name); + free_netdev(slave_dev); + return NULL; + } + + netif_carrier_off(slave_dev); + + if (p->phy != NULL) { + phy_attach(slave_dev, p->phy->dev.bus_id, + 0, PHY_INTERFACE_MODE_GMII); + + p->phy->autoneg = AUTONEG_ENABLE; + p->phy->speed = 0; + p->phy->duplex = 0; + p->phy->advertising = p->phy->supported | ADVERTISED_Autoneg; + phy_start_aneg(p->phy); + } + + return slave_dev; +} diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c new file mode 100644 index 000000000000..bdc0510b53b7 --- /dev/null +++ b/net/dsa/tag_dsa.c @@ -0,0 +1,194 @@ +/* + * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include "dsa_priv.h" + +#define DSA_HLEN 4 + +int dsa_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u8 *dsa_header; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* + * Convert the outermost 802.1q tag to a DSA tag for tagged + * packets, or insert a DSA tag between the addresses and + * the ethertype field for untagged packets. + */ + if (skb->protocol == htons(ETH_P_8021Q)) { + if (skb_cow_head(skb, 0) < 0) + goto out_free; + + /* + * Construct tagged FROM_CPU DSA tag from 802.1q tag. + */ + dsa_header = skb->data + 2 * ETH_ALEN; + dsa_header[0] = 0x60; + dsa_header[1] = p->port << 3; + + /* + * Move CFI field from byte 2 to byte 1. + */ + if (dsa_header[2] & 0x10) { + dsa_header[1] |= 0x01; + dsa_header[2] &= ~0x10; + } + } else { + if (skb_cow_head(skb, DSA_HLEN) < 0) + goto out_free; + skb_push(skb, DSA_HLEN); + + memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); + + /* + * Construct untagged FROM_CPU DSA tag. + */ + dsa_header = skb->data + 2 * ETH_ALEN; + dsa_header[0] = 0x40; + dsa_header[1] = p->port << 3; + dsa_header[2] = 0x00; + dsa_header[3] = 0x00; + } + + skb->protocol = htons(ETH_P_DSA); + + skb->dev = p->parent->master_netdev; + dev_queue_xmit(skb); + + return NETDEV_TX_OK; + +out_free: + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch *ds = dev->dsa_ptr; + u8 *dsa_header; + int source_port; + + if (unlikely(ds == NULL)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (skb == NULL) + goto out; + + if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) + goto out_drop; + + /* + * The ethertype field is part of the DSA header. + */ + dsa_header = skb->data - 2; + + /* + * Check that frame type is either TO_CPU or FORWARD, and + * that the source device is zero. + */ + if ((dsa_header[0] & 0xdf) != 0x00 && (dsa_header[0] & 0xdf) != 0xc0) + goto out_drop; + + /* + * Check that the source port is a registered DSA port. + */ + source_port = (dsa_header[1] >> 3) & 0x1f; + if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) + goto out_drop; + + /* + * Convert the DSA header to an 802.1q header if the 'tagged' + * bit in the DSA header is set. If the 'tagged' bit is clear, + * delete the DSA header entirely. + */ + if (dsa_header[0] & 0x20) { + u8 new_header[4]; + + /* + * Insert 802.1q ethertype and copy the VLAN-related + * fields, but clear the bit that will hold CFI (since + * DSA uses that bit location for another purpose). + */ + new_header[0] = (ETH_P_8021Q >> 8) & 0xff; + new_header[1] = ETH_P_8021Q & 0xff; + new_header[2] = dsa_header[2] & ~0x10; + new_header[3] = dsa_header[3]; + + /* + * Move CFI bit from its place in the DSA header to + * its 802.1q-designated place. + */ + if (dsa_header[1] & 0x01) + new_header[2] |= 0x10; + + /* + * Update packet checksum if skb is CHECKSUM_COMPLETE. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __wsum c = skb->csum; + c = csum_add(c, csum_partial(new_header + 2, 2, 0)); + c = csum_sub(c, csum_partial(dsa_header + 2, 2, 0)); + skb->csum = c; + } + + memcpy(dsa_header, new_header, DSA_HLEN); + } else { + /* + * Remove DSA tag and update checksum. + */ + skb_pull_rcsum(skb, DSA_HLEN); + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - DSA_HLEN, + 2 * ETH_ALEN); + } + + skb->dev = ds->ports[source_port]; + skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->last_rx = jiffies; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +static struct packet_type dsa_packet_type = { + .type = __constant_htons(ETH_P_DSA), + .func = dsa_rcv, +}; + +static int __init dsa_init_module(void) +{ + dev_add_pack(&dsa_packet_type); + return 0; +} +module_init(dsa_init_module); + +static void __exit dsa_cleanup_module(void) +{ + dev_remove_pack(&dsa_packet_type); +} +module_exit(dsa_cleanup_module); diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c new file mode 100644 index 000000000000..f985ea993843 --- /dev/null +++ b/net/dsa/tag_edsa.c @@ -0,0 +1,213 @@ +/* + * net/dsa/tag_edsa.c - Ethertype DSA tagging + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include "dsa_priv.h" + +#define DSA_HLEN 4 +#define EDSA_HLEN 8 + +int edsa_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u8 *edsa_header; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* + * Convert the outermost 802.1q tag to a DSA tag and prepend + * a DSA ethertype field is the packet is tagged, or insert + * a DSA ethertype plus DSA tag between the addresses and the + * current ethertype field if the packet is untagged. + */ + if (skb->protocol == htons(ETH_P_8021Q)) { + if (skb_cow_head(skb, DSA_HLEN) < 0) + goto out_free; + skb_push(skb, DSA_HLEN); + + memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); + + /* + * Construct tagged FROM_CPU DSA tag from 802.1q tag. + */ + edsa_header = skb->data + 2 * ETH_ALEN; + edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; + edsa_header[1] = ETH_P_EDSA & 0xff; + edsa_header[2] = 0x00; + edsa_header[3] = 0x00; + edsa_header[4] = 0x60; + edsa_header[5] = p->port << 3; + + /* + * Move CFI field from byte 6 to byte 5. + */ + if (edsa_header[6] & 0x10) { + edsa_header[5] |= 0x01; + edsa_header[6] &= ~0x10; + } + } else { + if (skb_cow_head(skb, EDSA_HLEN) < 0) + goto out_free; + skb_push(skb, EDSA_HLEN); + + memmove(skb->data, skb->data + EDSA_HLEN, 2 * ETH_ALEN); + + /* + * Construct untagged FROM_CPU DSA tag. + */ + edsa_header = skb->data + 2 * ETH_ALEN; + edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; + edsa_header[1] = ETH_P_EDSA & 0xff; + edsa_header[2] = 0x00; + edsa_header[3] = 0x00; + edsa_header[4] = 0x40; + edsa_header[5] = p->port << 3; + edsa_header[6] = 0x00; + edsa_header[7] = 0x00; + } + + skb->protocol = htons(ETH_P_EDSA); + + skb->dev = p->parent->master_netdev; + dev_queue_xmit(skb); + + return NETDEV_TX_OK; + +out_free: + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch *ds = dev->dsa_ptr; + u8 *edsa_header; + int source_port; + + if (unlikely(ds == NULL)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (skb == NULL) + goto out; + + if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) + goto out_drop; + + /* + * Skip the two null bytes after the ethertype. + */ + edsa_header = skb->data + 2; + + /* + * Check that frame type is either TO_CPU or FORWARD, and + * that the source device is zero. + */ + if ((edsa_header[0] & 0xdf) != 0x00 && (edsa_header[0] & 0xdf) != 0xc0) + goto out_drop; + + /* + * Check that the source port is a registered DSA port. + */ + source_port = (edsa_header[1] >> 3) & 0x1f; + if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) + goto out_drop; + + /* + * If the 'tagged' bit is set, convert the DSA tag to a 802.1q + * tag and delete the ethertype part. If the 'tagged' bit is + * clear, delete the ethertype and the DSA tag parts. + */ + if (edsa_header[0] & 0x20) { + u8 new_header[4]; + + /* + * Insert 802.1q ethertype and copy the VLAN-related + * fields, but clear the bit that will hold CFI (since + * DSA uses that bit location for another purpose). + */ + new_header[0] = (ETH_P_8021Q >> 8) & 0xff; + new_header[1] = ETH_P_8021Q & 0xff; + new_header[2] = edsa_header[2] & ~0x10; + new_header[3] = edsa_header[3]; + + /* + * Move CFI bit from its place in the DSA header to + * its 802.1q-designated place. + */ + if (edsa_header[1] & 0x01) + new_header[2] |= 0x10; + + skb_pull_rcsum(skb, DSA_HLEN); + + /* + * Update packet checksum if skb is CHECKSUM_COMPLETE. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __wsum c = skb->csum; + c = csum_add(c, csum_partial(new_header + 2, 2, 0)); + c = csum_sub(c, csum_partial(edsa_header + 2, 2, 0)); + skb->csum = c; + } + + memcpy(edsa_header, new_header, DSA_HLEN); + + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - DSA_HLEN, + 2 * ETH_ALEN); + } else { + /* + * Remove DSA tag and update checksum. + */ + skb_pull_rcsum(skb, EDSA_HLEN); + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - EDSA_HLEN, + 2 * ETH_ALEN); + } + + skb->dev = ds->ports[source_port]; + skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->last_rx = jiffies; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +static struct packet_type edsa_packet_type = { + .type = __constant_htons(ETH_P_EDSA), + .func = edsa_rcv, +}; + +static int __init edsa_init_module(void) +{ + dev_add_pack(&edsa_packet_type); + return 0; +} +module_init(edsa_init_module); + +static void __exit edsa_cleanup_module(void) +{ + dev_remove_pack(&edsa_packet_type); +} +module_exit(edsa_cleanup_module); diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c new file mode 100644 index 000000000000..d3117764b2c2 --- /dev/null +++ b/net/dsa/tag_trailer.c @@ -0,0 +1,130 @@ +/* + * net/dsa/tag_trailer.c - Trailer tag format handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include "dsa_priv.h" + +int trailer_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct sk_buff *nskb; + int padlen; + u8 *trailer; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* + * We have to make sure that the trailer ends up as the very + * last 4 bytes of the packet. This means that we have to pad + * the packet to the minimum ethernet frame size, if necessary, + * before adding the trailer. + */ + padlen = 0; + if (skb->len < 60) + padlen = 60 - skb->len; + + nskb = alloc_skb(NET_IP_ALIGN + skb->len + padlen + 4, GFP_ATOMIC); + if (nskb == NULL) { + kfree_skb(skb); + return NETDEV_TX_OK; + } + skb_reserve(nskb, NET_IP_ALIGN); + + skb_reset_mac_header(nskb); + skb_set_network_header(nskb, skb_network_header(skb) - skb->head); + skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head); + skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); + kfree_skb(skb); + + if (padlen) { + u8 *pad = skb_put(nskb, padlen); + memset(pad, 0, padlen); + } + + trailer = skb_put(nskb, 4); + trailer[0] = 0x80; + trailer[1] = 1 << p->port; + trailer[2] = 0x10; + trailer[3] = 0x00; + + nskb->protocol = htons(ETH_P_TRAILER); + + nskb->dev = p->parent->master_netdev; + dev_queue_xmit(nskb); + + return NETDEV_TX_OK; +} + +static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch *ds = dev->dsa_ptr; + u8 *trailer; + int source_port; + + if (unlikely(ds == NULL)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (skb == NULL) + goto out; + + if (skb_linearize(skb)) + goto out_drop; + + trailer = skb_tail_pointer(skb) - 4; + if (trailer[0] != 0x80 || (trailer[1] & 0xf8) != 0x00 || + (trailer[3] & 0xef) != 0x00 || trailer[3] != 0x00) + goto out_drop; + + source_port = trailer[1] & 7; + if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) + goto out_drop; + + pskb_trim_rcsum(skb, skb->len - 4); + + skb->dev = ds->ports[source_port]; + skb_push(skb, ETH_HLEN); + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->last_rx = jiffies; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +static struct packet_type trailer_packet_type = { + .type = __constant_htons(ETH_P_TRAILER), + .func = trailer_rcv, +}; + +static int __init trailer_init_module(void) +{ + dev_add_pack(&trailer_packet_type); + return 0; +} +module_init(trailer_init_module); + +static void __exit trailer_cleanup_module(void) +{ + dev_remove_pack(&trailer_packet_type); +} +module_exit(trailer_cleanup_module); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index a80839b02e3f..b9d85af2dd31 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -57,6 +57,7 @@ #include <net/sock.h> #include <net/ipv6.h> #include <net/ip.h> +#include <net/dsa.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -129,7 +130,7 @@ int eth_rebuild_header(struct sk_buff *skb) switch (eth->h_proto) { #ifdef CONFIG_INET - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return arp_find(eth->h_dest, skb); #endif default: @@ -184,6 +185,17 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb->pkt_type = PACKET_OTHERHOST; } + /* + * Some variants of DSA tagging don't have an ethertype field + * at all, so we check here whether one of those tagging + * variants has been configured on the receiving interface, + * and if so, set skb->protocol without looking at the packet. + */ + if (netdev_uses_dsa_tags(dev)) + return htons(ETH_P_DSA); + if (netdev_uses_trailer_tags(dev)) + return htons(ETH_P_TRAILER); + if (ntohs(eth->h_proto) >= 1536) return eth->h_proto; diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 3bca97f55d47..949772a5a7dc 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -157,7 +157,7 @@ struct net_device *alloc_ieee80211(int sizeof_priv) err = ieee80211_networks_allocate(ieee); if (err) { IEEE80211_ERROR("Unable to allocate beacon storage: %d\n", err); - goto failed; + goto failed_free_netdev; } ieee80211_networks_initialize(ieee); @@ -193,9 +193,9 @@ struct net_device *alloc_ieee80211(int sizeof_priv) return dev; - failed: - if (dev) - free_netdev(dev); +failed_free_netdev: + free_netdev(dev); +failed: return NULL; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 591ea23639ca..691268f3a359 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -630,5 +630,3 @@ config TCP_MD5SIG If unsure, say N. -source "net/ipv4/ipvs/Kconfig" - diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ad40ef3f9ebc..80ff87ce43aa 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o obj-$(CONFIG_IP_PNP) += ipconfig.o obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ -obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8a3ac1fa71a9..1fbff5fa4241 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -469,7 +469,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && - !inet->freebind && + !(inet->freebind || inet->transparent) && addr->sin_addr.s_addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2c0e4572cc90..490e035c6d90 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -13,7 +13,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -47,17 +47,7 @@ #include <asm/bug.h> #include <asm/unaligned.h> -struct cipso_v4_domhsh_entry { - char *domain; - u32 valid; - struct list_head list; - struct rcu_head rcu; -}; - /* List of available DOI definitions */ -/* XXX - Updates should be minimal so having a single lock for the - * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be - * okay. */ /* XXX - This currently assumes a minimal number of different DOIs in use, * if in practice there are a lot of different DOIs this list should * probably be turned into a hash table or something similar so we @@ -119,6 +109,19 @@ int cipso_v4_rbm_strictvalid = 1; * be omitted. */ #define CIPSO_V4_TAG_RNG_CAT_MAX 8 +/* Base length of the local tag (non-standard tag). + * Tag definition (may change between kernel versions) + * + * 0 8 16 24 32 + * +----------+----------+----------+----------+ + * | 10000000 | 00000110 | 32-bit secid value | + * +----------+----------+----------+----------+ + * | in (host byte order)| + * +----------+----------+ + * + */ +#define CIPSO_V4_TAG_LOC_BLEN 6 + /* * Helper Functions */ @@ -194,25 +197,6 @@ static void cipso_v4_bitmap_setbit(unsigned char *bitmap, } /** - * cipso_v4_doi_domhsh_free - Frees a domain list entry - * @entry: the entry's RCU field - * - * Description: - * This function is designed to be used as a callback to the call_rcu() - * function so that the memory allocated to a domain list entry can be released - * safely. - * - */ -static void cipso_v4_doi_domhsh_free(struct rcu_head *entry) -{ - struct cipso_v4_domhsh_entry *ptr; - - ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu); - kfree(ptr->domain); - kfree(ptr); -} - -/** * cipso_v4_cache_entry_free - Frees a cache entry * @entry: the entry to free * @@ -457,7 +441,7 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) struct cipso_v4_doi *iter; list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->doi == doi && iter->valid) + if (iter->doi == doi && atomic_read(&iter->refcount)) return iter; return NULL; } @@ -496,14 +480,17 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) if (doi_def->type != CIPSO_V4_MAP_PASS) return -EINVAL; break; + case CIPSO_V4_TAG_LOCAL: + if (doi_def->type != CIPSO_V4_MAP_LOCAL) + return -EINVAL; + break; default: return -EINVAL; } } - doi_def->valid = 1; + atomic_set(&doi_def->refcount, 1); INIT_RCU_HEAD(&doi_def->rcu); - INIT_LIST_HEAD(&doi_def->dom_list); spin_lock(&cipso_v4_doi_list_lock); if (cipso_v4_doi_search(doi_def->doi) != NULL) @@ -519,59 +506,129 @@ doi_add_failure: } /** + * cipso_v4_doi_free - Frees a DOI definition + * @entry: the entry's RCU field + * + * Description: + * This function frees all of the memory associated with a DOI definition. + * + */ +void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) +{ + if (doi_def == NULL) + return; + + switch (doi_def->type) { + case CIPSO_V4_MAP_TRANS: + kfree(doi_def->map.std->lvl.cipso); + kfree(doi_def->map.std->lvl.local); + kfree(doi_def->map.std->cat.cipso); + kfree(doi_def->map.std->cat.local); + break; + } + kfree(doi_def); +} + +/** + * cipso_v4_doi_free_rcu - Frees a DOI definition via the RCU pointer + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that the memory allocated to the DOI definition can be released + * safely. + * + */ +static void cipso_v4_doi_free_rcu(struct rcu_head *entry) +{ + struct cipso_v4_doi *doi_def; + + doi_def = container_of(entry, struct cipso_v4_doi, rcu); + cipso_v4_doi_free(doi_def); +} + +/** * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine * @doi: the DOI value * @audit_secid: the LSM secid to use in the audit message - * @callback: the DOI cleanup/free callback * * Description: - * Removes a DOI definition from the CIPSO engine, @callback is called to - * free any memory. The NetLabel routines will be called to release their own - * LSM domain mappings as well as our own domain list. Returns zero on - * success and negative values on failure. + * Removes a DOI definition from the CIPSO engine. The NetLabel routines will + * be called to release their own LSM domain mappings as well as our own + * domain list. Returns zero on success and negative values on failure. * */ -int cipso_v4_doi_remove(u32 doi, - struct netlbl_audit *audit_info, - void (*callback) (struct rcu_head * head)) +int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) { struct cipso_v4_doi *doi_def; - struct cipso_v4_domhsh_entry *dom_iter; spin_lock(&cipso_v4_doi_list_lock); doi_def = cipso_v4_doi_search(doi); - if (doi_def != NULL) { - doi_def->valid = 0; - list_del_rcu(&doi_def->list); + if (doi_def == NULL) { spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_lock(); - list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) - if (dom_iter->valid) - netlbl_cfg_map_del(dom_iter->domain, - audit_info); - rcu_read_unlock(); - cipso_v4_cache_invalidate(); - call_rcu(&doi_def->rcu, callback); - return 0; + return -ENOENT; + } + if (!atomic_dec_and_test(&doi_def->refcount)) { + spin_unlock(&cipso_v4_doi_list_lock); + return -EBUSY; } + list_del_rcu(&doi_def->list); spin_unlock(&cipso_v4_doi_list_lock); - return -ENOENT; + cipso_v4_cache_invalidate(); + call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); + + return 0; } /** - * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition + * cipso_v4_doi_getdef - Returns a reference to a valid DOI definition * @doi: the DOI value * * Description: * Searches for a valid DOI definition and if one is found it is returned to * the caller. Otherwise NULL is returned. The caller must ensure that - * rcu_read_lock() is held while accessing the returned definition. + * rcu_read_lock() is held while accessing the returned definition and the DOI + * definition reference count is decremented when the caller is done. * */ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) { - return cipso_v4_doi_search(doi); + struct cipso_v4_doi *doi_def; + + rcu_read_lock(); + doi_def = cipso_v4_doi_search(doi); + if (doi_def == NULL) + goto doi_getdef_return; + if (!atomic_inc_not_zero(&doi_def->refcount)) + doi_def = NULL; + +doi_getdef_return: + rcu_read_unlock(); + return doi_def; +} + +/** + * cipso_v4_doi_putdef - Releases a reference for the given DOI definition + * @doi_def: the DOI definition + * + * Description: + * Releases a DOI definition reference obtained from cipso_v4_doi_getdef(). + * + */ +void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def) +{ + if (doi_def == NULL) + return; + + if (!atomic_dec_and_test(&doi_def->refcount)) + return; + spin_lock(&cipso_v4_doi_list_lock); + list_del_rcu(&doi_def->list); + spin_unlock(&cipso_v4_doi_list_lock); + + cipso_v4_cache_invalidate(); + call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); } /** @@ -597,7 +654,7 @@ int cipso_v4_doi_walk(u32 *skip_cnt, rcu_read_lock(); list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) - if (iter_doi->valid) { + if (atomic_read(&iter_doi->refcount) > 0) { if (doi_cnt++ < *skip_cnt) continue; ret_val = callback(iter_doi, cb_arg); @@ -613,85 +670,6 @@ doi_walk_return: return ret_val; } -/** - * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition - * @doi_def: the DOI definition - * @domain: the domain to add - * - * Description: - * Adds the @domain to the DOI specified by @doi_def, this function - * should only be called by external functions (i.e. NetLabel). This function - * does allocate memory. Returns zero on success, negative values on failure. - * - */ -int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain) -{ - struct cipso_v4_domhsh_entry *iter; - struct cipso_v4_domhsh_entry *new_dom; - - new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL); - if (new_dom == NULL) - return -ENOMEM; - if (domain) { - new_dom->domain = kstrdup(domain, GFP_KERNEL); - if (new_dom->domain == NULL) { - kfree(new_dom); - return -ENOMEM; - } - } - new_dom->valid = 1; - INIT_RCU_HEAD(&new_dom->rcu); - - spin_lock(&cipso_v4_doi_list_lock); - list_for_each_entry(iter, &doi_def->dom_list, list) - if (iter->valid && - ((domain != NULL && iter->domain != NULL && - strcmp(iter->domain, domain) == 0) || - (domain == NULL && iter->domain == NULL))) { - spin_unlock(&cipso_v4_doi_list_lock); - kfree(new_dom->domain); - kfree(new_dom); - return -EEXIST; - } - list_add_tail_rcu(&new_dom->list, &doi_def->dom_list); - spin_unlock(&cipso_v4_doi_list_lock); - - return 0; -} - -/** - * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition - * @doi_def: the DOI definition - * @domain: the domain to remove - * - * Description: - * Removes the @domain from the DOI specified by @doi_def, this function - * should only be called by external functions (i.e. NetLabel). Returns zero - * on success and negative values on error. - * - */ -int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, - const char *domain) -{ - struct cipso_v4_domhsh_entry *iter; - - spin_lock(&cipso_v4_doi_list_lock); - list_for_each_entry(iter, &doi_def->dom_list, list) - if (iter->valid && - ((domain != NULL && iter->domain != NULL && - strcmp(iter->domain, domain) == 0) || - (domain == NULL && iter->domain == NULL))) { - iter->valid = 0; - list_del_rcu(&iter->list); - spin_unlock(&cipso_v4_doi_list_lock); - call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free); - return 0; - } - spin_unlock(&cipso_v4_doi_list_lock); - - return -ENOENT; -} - /* * Label Mapping Functions */ @@ -712,7 +690,7 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) switch (doi_def->type) { case CIPSO_V4_MAP_PASS: return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) return 0; break; @@ -741,7 +719,7 @@ static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: *net_lvl = host_lvl; return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (host_lvl < doi_def->map.std->lvl.local_size && doi_def->map.std->lvl.local[host_lvl] < CIPSO_V4_INV_LVL) { *net_lvl = doi_def->map.std->lvl.local[host_lvl]; @@ -775,7 +753,7 @@ static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: *host_lvl = net_lvl; return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: map_tbl = doi_def->map.std; if (net_lvl < map_tbl->lvl.cipso_size && map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { @@ -812,7 +790,7 @@ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, switch (doi_def->type) { case CIPSO_V4_MAP_PASS: return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: cipso_cat_size = doi_def->map.std->cat.cipso_size; cipso_array = doi_def->map.std->cat.cipso; for (;;) { @@ -860,7 +838,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, u32 host_cat_size = 0; u32 *host_cat_array = NULL; - if (doi_def->type == CIPSO_V4_MAP_STD) { + if (doi_def->type == CIPSO_V4_MAP_TRANS) { host_cat_size = doi_def->map.std->cat.local_size; host_cat_array = doi_def->map.std->cat.local; } @@ -875,7 +853,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: net_spot = host_spot; break; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (host_spot >= host_cat_size) return -EPERM; net_spot = host_cat_array[host_spot]; @@ -921,7 +899,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, u32 net_cat_size = 0; u32 *net_cat_array = NULL; - if (doi_def->type == CIPSO_V4_MAP_STD) { + if (doi_def->type == CIPSO_V4_MAP_TRANS) { net_cat_size = doi_def->map.std->cat.cipso_size; net_cat_array = doi_def->map.std->cat.cipso; } @@ -941,7 +919,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: host_spot = net_spot; break; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (net_spot >= net_cat_size) return -EPERM; host_spot = net_cat_array[net_spot]; @@ -1277,7 +1255,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, } else tag_len = 4; - buffer[0] = 0x01; + buffer[0] = CIPSO_V4_TAG_RBITMAP; buffer[1] = tag_len; buffer[3] = level; @@ -1373,7 +1351,7 @@ static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def, } else tag_len = 4; - buffer[0] = 0x02; + buffer[0] = CIPSO_V4_TAG_ENUM; buffer[1] = tag_len; buffer[3] = level; @@ -1469,7 +1447,7 @@ static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def, } else tag_len = 4; - buffer[0] = 0x05; + buffer[0] = CIPSO_V4_TAG_RANGE; buffer[1] = tag_len; buffer[3] = level; @@ -1523,6 +1501,54 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, } /** + * cipso_v4_gentag_loc - Generate a CIPSO local tag (non-standard) + * @doi_def: the DOI definition + * @secattr: the security attributes + * @buffer: the option buffer + * @buffer_len: length of buffer in bytes + * + * Description: + * Generate a CIPSO option using the local tag. Returns the size of the tag + * on success, negative values on failure. + * + */ +static int cipso_v4_gentag_loc(const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr, + unsigned char *buffer, + u32 buffer_len) +{ + if (!(secattr->flags & NETLBL_SECATTR_SECID)) + return -EPERM; + + buffer[0] = CIPSO_V4_TAG_LOCAL; + buffer[1] = CIPSO_V4_TAG_LOC_BLEN; + *(u32 *)&buffer[2] = secattr->attr.secid; + + return CIPSO_V4_TAG_LOC_BLEN; +} + +/** + * cipso_v4_parsetag_loc - Parse a CIPSO local tag + * @doi_def: the DOI definition + * @tag: the CIPSO tag + * @secattr: the security attributes + * + * Description: + * Parse a CIPSO local tag and return the security attributes in @secattr. + * Return zero on success, negatives values on failure. + * + */ +static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def, + const unsigned char *tag, + struct netlbl_lsm_secattr *secattr) +{ + secattr->attr.secid = *(u32 *)&tag[2]; + secattr->flags |= NETLBL_SECATTR_SECID; + + return 0; +} + +/** * cipso_v4_validate - Validate a CIPSO option * @option: the start of the option, on error it is set to point to the error * @@ -1541,7 +1567,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, * that is unrecognized." * */ -int cipso_v4_validate(unsigned char **option) +int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) { unsigned char *opt = *option; unsigned char *tag; @@ -1566,7 +1592,7 @@ int cipso_v4_validate(unsigned char **option) goto validate_return_locked; } - opt_iter = 6; + opt_iter = CIPSO_V4_HDR_LEN; tag = opt + opt_iter; while (opt_iter < opt_len) { for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) @@ -1584,7 +1610,7 @@ int cipso_v4_validate(unsigned char **option) switch (tag[0]) { case CIPSO_V4_TAG_RBITMAP: - if (tag_len < 4) { + if (tag_len < CIPSO_V4_TAG_RBM_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } @@ -1602,7 +1628,7 @@ int cipso_v4_validate(unsigned char **option) err_offset = opt_iter + 3; goto validate_return_locked; } - if (tag_len > 4 && + if (tag_len > CIPSO_V4_TAG_RBM_BLEN && cipso_v4_map_cat_rbm_valid(doi_def, &tag[4], tag_len - 4) < 0) { @@ -1612,7 +1638,7 @@ int cipso_v4_validate(unsigned char **option) } break; case CIPSO_V4_TAG_ENUM: - if (tag_len < 4) { + if (tag_len < CIPSO_V4_TAG_ENUM_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } @@ -1622,7 +1648,7 @@ int cipso_v4_validate(unsigned char **option) err_offset = opt_iter + 3; goto validate_return_locked; } - if (tag_len > 4 && + if (tag_len > CIPSO_V4_TAG_ENUM_BLEN && cipso_v4_map_cat_enum_valid(doi_def, &tag[4], tag_len - 4) < 0) { @@ -1631,7 +1657,7 @@ int cipso_v4_validate(unsigned char **option) } break; case CIPSO_V4_TAG_RANGE: - if (tag_len < 4) { + if (tag_len < CIPSO_V4_TAG_RNG_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } @@ -1641,7 +1667,7 @@ int cipso_v4_validate(unsigned char **option) err_offset = opt_iter + 3; goto validate_return_locked; } - if (tag_len > 4 && + if (tag_len > CIPSO_V4_TAG_RNG_BLEN && cipso_v4_map_cat_rng_valid(doi_def, &tag[4], tag_len - 4) < 0) { @@ -1649,6 +1675,19 @@ int cipso_v4_validate(unsigned char **option) goto validate_return_locked; } break; + case CIPSO_V4_TAG_LOCAL: + /* This is a non-standard tag that we only allow for + * local connections, so if the incoming interface is + * not the loopback device drop the packet. */ + if (!(skb->dev->flags & IFF_LOOPBACK)) { + err_offset = opt_iter; + goto validate_return_locked; + } + if (tag_len != CIPSO_V4_TAG_LOC_BLEN) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + break; default: err_offset = opt_iter; goto validate_return_locked; @@ -1704,48 +1743,27 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) } /** - * cipso_v4_sock_setattr - Add a CIPSO option to a socket - * @sk: the socket + * cipso_v4_genopt - Generate a CIPSO option + * @buf: the option buffer + * @buf_len: the size of opt_buf * @doi_def: the CIPSO DOI to use - * @secattr: the specific security attributes of the socket + * @secattr: the security attributes * * Description: - * Set the CIPSO option on the given socket using the DOI definition and - * security attributes passed to the function. This function requires - * exclusive access to @sk, which means it either needs to be in the - * process of being created or locked. Returns zero on success and negative - * values on failure. + * Generate a CIPSO option using the DOI definition and security attributes + * passed to the function. Returns the length of the option on success and + * negative values on failure. * */ -int cipso_v4_sock_setattr(struct sock *sk, - const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr) +static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) { - int ret_val = -EPERM; + int ret_val; u32 iter; - unsigned char *buf; - u32 buf_len = 0; - u32 opt_len; - struct ip_options *opt = NULL; - struct inet_sock *sk_inet; - struct inet_connection_sock *sk_conn; - /* In the case of sock_create_lite(), the sock->sk field is not - * defined yet but it is not a problem as the only users of these - * "lite" PF_INET sockets are functions which do an accept() call - * afterwards so we will label the socket as part of the accept(). */ - if (sk == NULL) - return 0; - - /* We allocate the maximum CIPSO option size here so we are probably - * being a little wasteful, but it makes our life _much_ easier later - * on and after all we are only talking about 40 bytes. */ - buf_len = CIPSO_V4_OPT_LEN_MAX; - buf = kmalloc(buf_len, GFP_ATOMIC); - if (buf == NULL) { - ret_val = -ENOMEM; - goto socket_setattr_failure; - } + if (buf_len <= CIPSO_V4_HDR_LEN) + return -ENOSPC; /* XXX - This code assumes only one tag per CIPSO option which isn't * really a good assumption to make but since we only support the MAC @@ -1772,9 +1790,14 @@ int cipso_v4_sock_setattr(struct sock *sk, &buf[CIPSO_V4_HDR_LEN], buf_len - CIPSO_V4_HDR_LEN); break; + case CIPSO_V4_TAG_LOCAL: + ret_val = cipso_v4_gentag_loc(doi_def, + secattr, + &buf[CIPSO_V4_HDR_LEN], + buf_len - CIPSO_V4_HDR_LEN); + break; default: - ret_val = -EPERM; - goto socket_setattr_failure; + return -EPERM; } iter++; @@ -1782,9 +1805,58 @@ int cipso_v4_sock_setattr(struct sock *sk, iter < CIPSO_V4_TAG_MAXCNT && doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); if (ret_val < 0) - goto socket_setattr_failure; + return ret_val; cipso_v4_gentag_hdr(doi_def, buf, ret_val); - buf_len = CIPSO_V4_HDR_LEN + ret_val; + return CIPSO_V4_HDR_LEN + ret_val; +} + +/** + * cipso_v4_sock_setattr - Add a CIPSO option to a socket + * @sk: the socket + * @doi_def: the CIPSO DOI to use + * @secattr: the specific security attributes of the socket + * + * Description: + * Set the CIPSO option on the given socket using the DOI definition and + * security attributes passed to the function. This function requires + * exclusive access to @sk, which means it either needs to be in the + * process of being created or locked. Returns zero on success and negative + * values on failure. + * + */ +int cipso_v4_sock_setattr(struct sock *sk, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + unsigned char *buf = NULL; + u32 buf_len; + u32 opt_len; + struct ip_options *opt = NULL; + struct inet_sock *sk_inet; + struct inet_connection_sock *sk_conn; + + /* In the case of sock_create_lite(), the sock->sk field is not + * defined yet but it is not a problem as the only users of these + * "lite" PF_INET sockets are functions which do an accept() call + * afterwards so we will label the socket as part of the accept(). */ + if (sk == NULL) + return 0; + + /* We allocate the maximum CIPSO option size here so we are probably + * being a little wasteful, but it makes our life _much_ easier later + * on and after all we are only talking about 40 bytes. */ + buf_len = CIPSO_V4_OPT_LEN_MAX; + buf = kmalloc(buf_len, GFP_ATOMIC); + if (buf == NULL) { + ret_val = -ENOMEM; + goto socket_setattr_failure; + } + + ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (ret_val < 0) + goto socket_setattr_failure; + buf_len = ret_val; /* We can't use ip_options_get() directly because it makes a call to * ip_options_get_alloc() which allocates memory with GFP_KERNEL and @@ -1822,6 +1894,80 @@ socket_setattr_failure: } /** + * cipso_v4_sock_delattr - Delete the CIPSO option from a socket + * @sk: the socket + * + * Description: + * Removes the CIPSO option from a socket, if present. + * + */ +void cipso_v4_sock_delattr(struct sock *sk) +{ + u8 hdr_delta; + struct ip_options *opt; + struct inet_sock *sk_inet; + + sk_inet = inet_sk(sk); + opt = sk_inet->opt; + if (opt == NULL || opt->cipso == 0) + return; + + if (opt->srr || opt->rr || opt->ts || opt->router_alert) { + u8 cipso_len; + u8 cipso_off; + unsigned char *cipso_ptr; + int iter; + int optlen_new; + + cipso_off = opt->cipso - sizeof(struct iphdr); + cipso_ptr = &opt->__data[cipso_off]; + cipso_len = cipso_ptr[1]; + + if (opt->srr > opt->cipso) + opt->srr -= cipso_len; + if (opt->rr > opt->cipso) + opt->rr -= cipso_len; + if (opt->ts > opt->cipso) + opt->ts -= cipso_len; + if (opt->router_alert > opt->cipso) + opt->router_alert -= cipso_len; + opt->cipso = 0; + + memmove(cipso_ptr, cipso_ptr + cipso_len, + opt->optlen - cipso_off - cipso_len); + + /* determining the new total option length is tricky because of + * the padding necessary, the only thing i can think to do at + * this point is walk the options one-by-one, skipping the + * padding at the end to determine the actual option size and + * from there we can determine the new total option length */ + iter = 0; + optlen_new = 0; + while (iter < opt->optlen) + if (opt->__data[iter] != IPOPT_NOP) { + iter += opt->__data[iter + 1]; + optlen_new = iter; + } else + iter++; + hdr_delta = opt->optlen; + opt->optlen = (optlen_new + 3) & ~3; + hdr_delta -= opt->optlen; + } else { + /* only the cipso option was present on the socket so we can + * remove the entire option struct */ + sk_inet->opt = NULL; + hdr_delta = opt->optlen; + kfree(opt); + } + + if (sk_inet->is_icsk && hdr_delta > 0) { + struct inet_connection_sock *sk_conn = inet_csk(sk); + sk_conn->icsk_ext_hdr_len -= hdr_delta; + sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); + } +} + +/** * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions * @cipso: the CIPSO v4 option * @secattr: the security attributes @@ -1859,6 +2005,9 @@ static int cipso_v4_getattr(const unsigned char *cipso, case CIPSO_V4_TAG_RANGE: ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr); break; + case CIPSO_V4_TAG_LOCAL: + ret_val = cipso_v4_parsetag_loc(doi_def, &cipso[6], secattr); + break; } if (ret_val == 0) secattr->type = NETLBL_NLTYPE_CIPSOV4; @@ -1893,6 +2042,123 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) } /** + * cipso_v4_skbuff_setattr - Set the CIPSO option on a packet + * @skb: the packet + * @secattr: the security attributes + * + * Description: + * Set the CIPSO option on the given packet based on the security attributes. + * Returns a pointer to the IP header on success and NULL on failure. + * + */ +int cipso_v4_skbuff_setattr(struct sk_buff *skb, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct iphdr *iph; + struct ip_options *opt = &IPCB(skb)->opt; + unsigned char buf[CIPSO_V4_OPT_LEN_MAX]; + u32 buf_len = CIPSO_V4_OPT_LEN_MAX; + u32 opt_len; + int len_delta; + + buf_len = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (buf_len < 0) + return buf_len; + opt_len = (buf_len + 3) & ~3; + + /* we overwrite any existing options to ensure that we have enough + * room for the CIPSO option, the reason is that we _need_ to guarantee + * that the security label is applied to the packet - we do the same + * thing when using the socket options and it hasn't caused a problem, + * if we need to we can always revisit this choice later */ + + len_delta = opt_len - opt->optlen; + /* if we don't ensure enough headroom we could panic on the skb_push() + * call below so make sure we have enough, we are also "mangling" the + * packet so we should probably do a copy-on-write call anyway */ + ret_val = skb_cow(skb, skb_headroom(skb) + len_delta); + if (ret_val < 0) + return ret_val; + + if (len_delta > 0) { + /* we assume that the header + opt->optlen have already been + * "pushed" in ip_options_build() or similar */ + iph = ip_hdr(skb); + skb_push(skb, len_delta); + memmove((char *)iph - len_delta, iph, iph->ihl << 2); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + } else if (len_delta < 0) { + iph = ip_hdr(skb); + memset(iph + 1, IPOPT_NOP, opt->optlen); + } else + iph = ip_hdr(skb); + + if (opt->optlen > 0) + memset(opt, 0, sizeof(*opt)); + opt->optlen = opt_len; + opt->cipso = sizeof(struct iphdr); + opt->is_changed = 1; + + /* we have to do the following because we are being called from a + * netfilter hook which means the packet already has had the header + * fields populated and the checksum calculated - yes this means we + * are doing more work than needed but we do it to keep the core + * stack clean and tidy */ + memcpy(iph + 1, buf, buf_len); + if (opt_len > buf_len) + memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len); + if (len_delta != 0) { + iph->ihl = 5 + (opt_len >> 2); + iph->tot_len = htons(skb->len); + } + ip_send_check(iph); + + return 0; +} + +/** + * cipso_v4_skbuff_delattr - Delete any CIPSO options from a packet + * @skb: the packet + * + * Description: + * Removes any and all CIPSO options from the given packet. Returns zero on + * success, negative values on failure. + * + */ +int cipso_v4_skbuff_delattr(struct sk_buff *skb) +{ + int ret_val; + struct iphdr *iph; + struct ip_options *opt = &IPCB(skb)->opt; + unsigned char *cipso_ptr; + + if (opt->cipso == 0) + return 0; + + /* since we are changing the packet we should make a copy */ + ret_val = skb_cow(skb, skb_headroom(skb)); + if (ret_val < 0) + return ret_val; + + /* the easiest thing to do is just replace the cipso option with noop + * options since we don't change the size of the packet, although we + * still need to recalculate the checksum */ + + iph = ip_hdr(skb); + cipso_ptr = (unsigned char *)iph + opt->cipso; + memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]); + opt->cipso = 0; + opt->is_changed = 1; + + ip_send_check(iph); + + return 0; +} + +/** * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option * @skb: the packet * @secattr: the security attributes diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b12dae2b0b2d..5154e729cf16 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1283,7 +1283,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, return ret; } -static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen, +static int devinet_conf_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -1379,12 +1379,11 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, return ret; } -int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, +int ipv4_doint_and_flush_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { - int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp, - newval, newlen); + int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen); struct net *net = table->extra2; if (ret == 1) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 55c355e63234..72b2de76f1cd 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1,7 +1,7 @@ /* * NET3: Implementation of the ICMP protocol layer. * - * Alan Cox, <alan@redhat.com> + * Alan Cox, <alan@lxorguk.ukuu.org.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f70fac612596..a0d86455c53e 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -9,7 +9,7 @@ * seems to fall out with gcc 2.6.2. * * Authors: - * Alan Cox <Alan.Cox@linux.org> + * Alan Cox <alan@lxorguk.ukuu.org.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -1234,6 +1234,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) write_lock_bh(&in_dev->mc_list_lock); im->next=in_dev->mc_list; in_dev->mc_list=im; + in_dev->mc_count++; write_unlock_bh(&in_dev->mc_list_lock); #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); @@ -1282,6 +1283,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) if (--i->users == 0) { write_lock_bh(&in_dev->mc_list_lock); *ip = i->next; + in_dev->mc_count--; write_unlock_bh(&in_dev->mc_list_lock); igmp_group_dropped(i); @@ -1330,6 +1332,7 @@ void ip_mc_init_dev(struct in_device *in_dev) setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, (unsigned long)in_dev); in_dev->mr_ifc_count = 0; + in_dev->mc_count = 0; setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; @@ -1369,8 +1372,8 @@ void ip_mc_destroy_dev(struct in_device *in_dev) write_lock_bh(&in_dev->mc_list_lock); while ((i = in_dev->mc_list) != NULL) { in_dev->mc_list = i->next; + in_dev->mc_count--; write_unlock_bh(&in_dev->mc_list_lock); - igmp_group_dropped(i); ip_ma_put(i); @@ -2383,7 +2386,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) if (state->in_dev->mc_list == im) { seq_printf(seq, "%d\t%-10s: %5d %7s\n", - state->dev->ifindex, state->dev->name, state->dev->mc_count, querier); + state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } seq_printf(seq, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0c1ae68ee84b..bd1278a2d828 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -30,20 +30,22 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif /* - * This array holds the first and last local port number. + * This struct holds the first and last local port number. */ -int sysctl_local_port_range[2] = { 32768, 61000 }; -DEFINE_SEQLOCK(sysctl_port_range_lock); +struct local_ports sysctl_local_ports __read_mostly = { + .lock = SEQLOCK_UNLOCKED, + .range = { 32768, 61000 }, +}; void inet_get_local_port_range(int *low, int *high) { unsigned seq; do { - seq = read_seqbegin(&sysctl_port_range_lock); + seq = read_seqbegin(&sysctl_local_ports.lock); - *low = sysctl_local_port_range[0]; - *high = sysctl_local_port_range[1]; - } while (read_seqretry(&sysctl_port_range_lock, seq)); + *low = sysctl_local_ports.range[0]; + *high = sysctl_local_ports.range[1]; + } while (read_seqretry(&sysctl_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); @@ -335,6 +337,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; @@ -515,6 +518,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, newicsk->icsk_bind_hash = NULL; inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; + inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port); + inet_sk(newsk)->sport = inet_rsk(req)->loc_port; newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c10036e7a463..89cb047ab314 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -782,11 +782,15 @@ skip_listen_ht: struct sock *sk; struct hlist_node *node; + num = 0; + + if (hlist_empty(&head->chain) && hlist_empty(&head->twchain)) + continue; + if (i > s_i) s_num = 0; read_lock_bh(lock); - num = 0; sk_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index d985bd613d25..1c5fd38f8824 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -126,6 +126,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_reuse = sk->sk_reuse; tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; + tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; twsk_net_set(tw, hold_net(sock_net(sk))); atomic_set(&tw->tw_refcnt, 1); @@ -409,3 +410,38 @@ out: } EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); + +void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, + struct inet_timewait_death_row *twdr, int family) +{ + struct inet_timewait_sock *tw; + struct sock *sk; + struct hlist_node *node; + int h; + + local_bh_disable(); + for (h = 0; h < (hashinfo->ehash_size); h++) { + struct inet_ehash_bucket *head = + inet_ehash_bucket(hashinfo, h); + rwlock_t *lock = inet_ehash_lockp(hashinfo, h); +restart: + write_lock(lock); + sk_for_each(sk, node, &head->twchain) { + + tw = inet_twsk(sk); + if (!net_eq(twsk_net(tw), net) || + tw->tw_family != family) + continue; + + atomic_inc(&tw->tw_refcnt); + write_unlock(lock); + inet_twsk_deschedule(tw, twdr); + inet_twsk_put(tw); + + goto restart; + } + write_unlock(lock); + } + local_bh_enable(); +} +EXPORT_SYMBOL_GPL(inet_twsk_purge); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 2152d222b954..e4f81f54befe 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -6,7 +6,7 @@ * The IP fragmentation functionality. * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> - * Alan Cox <Alan.Cox@linux.org> + * Alan Cox <alan@lxorguk.ukuu.org.uk> * * Fixes: * Alan Cox : Split from ip.c , see ip_input.c for history. diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2a61158ea722..85c487b8572b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -27,6 +27,7 @@ #include <linux/inetdevice.h> #include <linux/igmp.h> #include <linux/netfilter_ipv4.h> +#include <linux/etherdevice.h> #include <linux/if_ether.h> #include <net/sock.h> @@ -41,6 +42,7 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/rtnetlink.h> #ifdef CONFIG_IPV6 #include <net/ipv6.h> @@ -117,8 +119,10 @@ Alexey Kuznetsov. */ +static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); static void ipgre_tunnel_setup(struct net_device *dev); +static int ipgre_tunnel_bind_dev(struct net_device *dev); /* Fallback tunnel: no source, no destination, no key, no options */ @@ -163,38 +167,64 @@ static DEFINE_RWLOCK(ipgre_lock); /* Given src, dst and key, find appropriate for input tunnel. */ static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net, - __be32 remote, __be32 local, __be32 key) + __be32 remote, __be32 local, + __be32 key, __be16 gre_proto) { unsigned h0 = HASH(remote); unsigned h1 = HASH(key); struct ip_tunnel *t; + struct ip_tunnel *t2 = NULL; struct ipgre_net *ign = net_generic(net, ipgre_net_id); + int dev_type = (gre_proto == htons(ETH_P_TEB)) ? + ARPHRD_ETHER : ARPHRD_IPGRE; for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { - if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) - return t; + if (t->parms.i_key == key && t->dev->flags & IFF_UP) { + if (t->dev->type == dev_type) + return t; + if (t->dev->type == ARPHRD_IPGRE && !t2) + t2 = t; + } } } + for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { if (remote == t->parms.iph.daddr) { - if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) - return t; + if (t->parms.i_key == key && t->dev->flags & IFF_UP) { + if (t->dev->type == dev_type) + return t; + if (t->dev->type == ARPHRD_IPGRE && !t2) + t2 = t; + } } } + for (t = ign->tunnels_l[h1]; t; t = t->next) { if (local == t->parms.iph.saddr || (local == t->parms.iph.daddr && ipv4_is_multicast(local))) { - if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) - return t; + if (t->parms.i_key == key && t->dev->flags & IFF_UP) { + if (t->dev->type == dev_type) + return t; + if (t->dev->type == ARPHRD_IPGRE && !t2) + t2 = t; + } } } + for (t = ign->tunnels_wc[h1]; t; t = t->next) { - if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) - return t; + if (t->parms.i_key == key && t->dev->flags & IFF_UP) { + if (t->dev->type == dev_type) + return t; + if (t->dev->type == ARPHRD_IPGRE && !t2) + t2 = t; + } } + if (t2) + return t2; + if (ign->fb_tunnel_dev->flags&IFF_UP) return netdev_priv(ign->fb_tunnel_dev); return NULL; @@ -249,25 +279,37 @@ static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) } } -static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, - struct ip_tunnel_parm *parms, int create) +static struct ip_tunnel *ipgre_tunnel_find(struct net *net, + struct ip_tunnel_parm *parms, + int type) { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; __be32 key = parms->i_key; - struct ip_tunnel *t, **tp, *nt; + struct ip_tunnel *t, **tp; + struct ipgre_net *ign = net_generic(net, ipgre_net_id); + + for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) + if (local == t->parms.iph.saddr && + remote == t->parms.iph.daddr && + key == t->parms.i_key && + type == t->dev->type) + break; + + return t; +} + +static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, + struct ip_tunnel_parm *parms, int create) +{ + struct ip_tunnel *t, *nt; struct net_device *dev; char name[IFNAMSIZ]; struct ipgre_net *ign = net_generic(net, ipgre_net_id); - for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) { - if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { - if (key == t->parms.i_key) - return t; - } - } - if (!create) - return NULL; + t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); + if (t || !create) + return t; if (parms->name[0]) strlcpy(name, parms->name, IFNAMSIZ); @@ -285,9 +327,11 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, goto failed_free; } - dev->init = ipgre_tunnel_init; nt = netdev_priv(dev); nt->parms = *parms; + dev->rtnl_link_ops = &ipgre_link_ops; + + dev->mtu = ipgre_tunnel_bind_dev(dev); if (register_netdevice(dev) < 0) goto failed_free; @@ -380,8 +424,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info) read_lock(&ipgre_lock); t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr, - (flags&GRE_KEY) ? - *(((__be32*)p) + (grehlen>>2) - 1) : 0); + flags & GRE_KEY ? + *(((__be32 *)p) + (grehlen / 4) - 1) : 0, + p[1]); if (t == NULL || t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) goto out; @@ -431,6 +476,8 @@ static int ipgre_rcv(struct sk_buff *skb) u32 seqno = 0; struct ip_tunnel *tunnel; int offset = 4; + __be16 gre_proto; + unsigned int len; if (!pskb_may_pull(skb, 16)) goto drop_nolock; @@ -470,20 +517,22 @@ static int ipgre_rcv(struct sk_buff *skb) } } + gre_proto = *(__be16 *)(h + 2); + read_lock(&ipgre_lock); if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), - iph->saddr, iph->daddr, key)) != NULL) { + iph->saddr, iph->daddr, key, + gre_proto))) { struct net_device_stats *stats = &tunnel->dev->stats; secpath_reset(skb); - skb->protocol = *(__be16*)(h + 2); + skb->protocol = gre_proto; /* WCCP version 1 and 2 protocol decoding. * - Change protocol to IP * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ - if (flags == 0 && - skb->protocol == htons(ETH_P_WCCP)) { + if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { skb->protocol = htons(ETH_P_IP); if ((*(h + offset) & 0xF0) != 0x40) offset += 4; @@ -491,7 +540,6 @@ static int ipgre_rcv(struct sk_buff *skb) skb->mac_header = skb->network_header; __pskb_pull(skb, offset); - skb_reset_network_header(skb); skb_postpull_rcsum(skb, skb_transport_header(skb), offset); skb->pkt_type = PACKET_HOST; #ifdef CONFIG_NET_IPGRE_BROADCAST @@ -519,13 +567,32 @@ static int ipgre_rcv(struct sk_buff *skb) } tunnel->i_seqno = seqno + 1; } + + len = skb->len; + + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + stats->rx_length_errors++; + stats->rx_errors++; + goto drop; + } + + iph = ip_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } + stats->rx_packets++; - stats->rx_bytes += skb->len; + stats->rx_bytes += len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; nf_reset(skb); + + skb_reset_network_header(skb); ipgre_ecn_decapsulate(iph, skb); + netif_rx(skb); read_unlock(&ipgre_lock); return(0); @@ -560,7 +627,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error; } - if (dev->header_ops) { + if (dev->type == ARPHRD_ETHER) + IPCB(skb)->flags = 0; + + if (dev->header_ops && dev->type == ARPHRD_IPGRE) { gre_hlen = 0; tiph = (struct iphdr*)skb->data; } else { @@ -637,7 +707,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) df = tiph->frag_off; if (df) - mtu = dst_mtu(&rt->u.dst) - tunnel->hlen; + mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; else mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; @@ -703,7 +773,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) old_iph = ip_hdr(skb); } - skb->transport_header = skb->network_header; + skb_reset_transport_header(skb); skb_push(skb, gre_hlen); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -736,8 +806,9 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); } - ((__be16*)(iph+1))[0] = tunnel->parms.o_flags; - ((__be16*)(iph+1))[1] = skb->protocol; + ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; + ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? + htons(ETH_P_TEB) : skb->protocol; if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); @@ -773,7 +844,7 @@ tx_error: return 0; } -static void ipgre_tunnel_bind_dev(struct net_device *dev) +static int ipgre_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; struct ip_tunnel *tunnel; @@ -785,7 +856,7 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev) tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; - /* Guess output device to choose reasonable mtu and hard_header_len */ + /* Guess output device to choose reasonable mtu and needed_headroom */ if (iph->daddr) { struct flowi fl = { .oif = tunnel->parms.link, @@ -799,14 +870,16 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev) tdev = rt->u.dst.dev; ip_rt_put(rt); } - dev->flags |= IFF_POINTOPOINT; + + if (dev->type != ARPHRD_ETHER) + dev->flags |= IFF_POINTOPOINT; } if (!tdev && tunnel->parms.link) tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); if (tdev) { - hlen = tdev->hard_header_len; + hlen = tdev->hard_header_len + tdev->needed_headroom; mtu = tdev->mtu; } dev->iflink = tunnel->parms.link; @@ -820,10 +893,15 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev) if (tunnel->parms.o_flags&GRE_SEQ) addend += 4; } - dev->hard_header_len = hlen + addend; - dev->mtu = mtu - addend; + dev->needed_headroom = addend + hlen; + mtu -= dev->hard_header_len - addend; + + if (mtu < 68) + mtu = 68; + tunnel->hlen = addend; + return mtu; } static int @@ -917,7 +995,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t->parms.iph.frag_off = p.iph.frag_off; if (t->parms.link != p.link) { t->parms.link = p.link; - ipgre_tunnel_bind_dev(dev); + dev->mtu = ipgre_tunnel_bind_dev(dev); netdev_state_change(dev); } } @@ -959,7 +1037,8 @@ done: static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) { struct ip_tunnel *tunnel = netdev_priv(dev); - if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen) + if (new_mtu < 68 || + new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) return -EINVAL; dev->mtu = new_mtu; return 0; @@ -1078,6 +1157,7 @@ static int ipgre_close(struct net_device *dev) static void ipgre_tunnel_setup(struct net_device *dev) { + dev->init = ipgre_tunnel_init; dev->uninit = ipgre_tunnel_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ipgre_tunnel_xmit; @@ -1085,7 +1165,7 @@ static void ipgre_tunnel_setup(struct net_device *dev) dev->change_mtu = ipgre_tunnel_change_mtu; dev->type = ARPHRD_IPGRE; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; + dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; dev->flags = IFF_NOARP; dev->iflink = 0; @@ -1107,8 +1187,6 @@ static int ipgre_tunnel_init(struct net_device *dev) memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); - ipgre_tunnel_bind_dev(dev); - if (iph->daddr) { #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { @@ -1189,6 +1267,7 @@ static int ipgre_init_net(struct net *net) ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init; dev_net_set(ign->fb_tunnel_dev, net); + ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; if ((err = register_netdev(ign->fb_tunnel_dev))) goto err_reg_dev; @@ -1221,6 +1300,298 @@ static struct pernet_operations ipgre_net_ops = { .exit = ipgre_exit_net, }; +static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + __be16 flags; + + if (!data) + return 0; + + flags = 0; + if (data[IFLA_GRE_IFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); + if (data[IFLA_GRE_OFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); + if (flags & (GRE_VERSION|GRE_ROUTING)) + return -EINVAL; + + return 0; +} + +static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + __be32 daddr; + + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + if (!data) + goto out; + + if (data[IFLA_GRE_REMOTE]) { + memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); + if (!daddr) + return -EINVAL; + } + +out: + return ipgre_tunnel_validate(tb, data); +} + +static void ipgre_netlink_parms(struct nlattr *data[], + struct ip_tunnel_parm *parms) +{ + memset(parms, 0, sizeof(*parms)); + + parms->iph.protocol = IPPROTO_GRE; + + if (!data) + return; + + if (data[IFLA_GRE_LINK]) + parms->link = nla_get_u32(data[IFLA_GRE_LINK]); + + if (data[IFLA_GRE_IFLAGS]) + parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); + + if (data[IFLA_GRE_OFLAGS]) + parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); + + if (data[IFLA_GRE_IKEY]) + parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); + + if (data[IFLA_GRE_OKEY]) + parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); + + if (data[IFLA_GRE_LOCAL]) + parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]); + + if (data[IFLA_GRE_REMOTE]) + parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]); + + if (data[IFLA_GRE_TTL]) + parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); + + if (data[IFLA_GRE_TOS]) + parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); + + if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) + parms->iph.frag_off = htons(IP_DF); +} + +static int ipgre_tap_init(struct net_device *dev) +{ + struct ip_tunnel *tunnel; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + ipgre_tunnel_bind_dev(dev); + + return 0; +} + +static void ipgre_tap_setup(struct net_device *dev) +{ + + ether_setup(dev); + + dev->init = ipgre_tap_init; + dev->uninit = ipgre_tunnel_uninit; + dev->destructor = free_netdev; + dev->hard_start_xmit = ipgre_tunnel_xmit; + dev->change_mtu = ipgre_tunnel_change_mtu; + + dev->iflink = 0; + dev->features |= NETIF_F_NETNS_LOCAL; +} + +static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip_tunnel *nt; + struct net *net = dev_net(dev); + struct ipgre_net *ign = net_generic(net, ipgre_net_id); + int mtu; + int err; + + nt = netdev_priv(dev); + ipgre_netlink_parms(data, &nt->parms); + + if (ipgre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + + if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) + random_ether_addr(dev->dev_addr); + + mtu = ipgre_tunnel_bind_dev(dev); + if (!tb[IFLA_MTU]) + dev->mtu = mtu; + + err = register_netdevice(dev); + if (err) + goto out; + + dev_hold(dev); + ipgre_tunnel_link(ign, nt); + +out: + return err; +} + +static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip_tunnel *t, *nt; + struct net *net = dev_net(dev); + struct ipgre_net *ign = net_generic(net, ipgre_net_id); + struct ip_tunnel_parm p; + int mtu; + + if (dev == ign->fb_tunnel_dev) + return -EINVAL; + + nt = netdev_priv(dev); + ipgre_netlink_parms(data, &p); + + t = ipgre_tunnel_locate(net, &p, 0); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else { + unsigned nflags = 0; + + t = nt; + + if (ipv4_is_multicast(p.iph.daddr)) + nflags = IFF_BROADCAST; + else if (p.iph.daddr) + nflags = IFF_POINTOPOINT; + + if ((dev->flags ^ nflags) & + (IFF_POINTOPOINT | IFF_BROADCAST)) + return -EINVAL; + + ipgre_tunnel_unlink(ign, t); + t->parms.iph.saddr = p.iph.saddr; + t->parms.iph.daddr = p.iph.daddr; + t->parms.i_key = p.i_key; + memcpy(dev->dev_addr, &p.iph.saddr, 4); + memcpy(dev->broadcast, &p.iph.daddr, 4); + ipgre_tunnel_link(ign, t); + netdev_state_change(dev); + } + + t->parms.o_key = p.o_key; + t->parms.iph.ttl = p.iph.ttl; + t->parms.iph.tos = p.iph.tos; + t->parms.iph.frag_off = p.iph.frag_off; + + if (t->parms.link != p.link) { + t->parms.link = p.link; + mtu = ipgre_tunnel_bind_dev(dev); + if (!tb[IFLA_MTU]) + dev->mtu = mtu; + netdev_state_change(dev); + } + + return 0; +} + +static size_t ipgre_get_size(const struct net_device *dev) +{ + return + /* IFLA_GRE_LINK */ + nla_total_size(4) + + /* IFLA_GRE_IFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_OFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_IKEY */ + nla_total_size(4) + + /* IFLA_GRE_OKEY */ + nla_total_size(4) + + /* IFLA_GRE_LOCAL */ + nla_total_size(4) + + /* IFLA_GRE_REMOTE */ + nla_total_size(4) + + /* IFLA_GRE_TTL */ + nla_total_size(1) + + /* IFLA_GRE_TOS */ + nla_total_size(1) + + /* IFLA_GRE_PMTUDISC */ + nla_total_size(1) + + 0; +} + +static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_parm *p = &t->parms; + + NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link); + NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags); + NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags); + NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key); + NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key); + NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr); + NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr); + NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl); + NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos); + NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { + [IFLA_GRE_LINK] = { .type = NLA_U32 }, + [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_IKEY] = { .type = NLA_U32 }, + [IFLA_GRE_OKEY] = { .type = NLA_U32 }, + [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, + [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_GRE_TTL] = { .type = NLA_U8 }, + [IFLA_GRE_TOS] = { .type = NLA_U8 }, + [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, +}; + +static struct rtnl_link_ops ipgre_link_ops __read_mostly = { + .kind = "gre", + .maxtype = IFLA_GRE_MAX, + .policy = ipgre_policy, + .priv_size = sizeof(struct ip_tunnel), + .setup = ipgre_tunnel_setup, + .validate = ipgre_tunnel_validate, + .newlink = ipgre_newlink, + .changelink = ipgre_changelink, + .get_size = ipgre_get_size, + .fill_info = ipgre_fill_info, +}; + +static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { + .kind = "gretap", + .maxtype = IFLA_GRE_MAX, + .policy = ipgre_policy, + .priv_size = sizeof(struct ip_tunnel), + .setup = ipgre_tap_setup, + .validate = ipgre_tap_validate, + .newlink = ipgre_newlink, + .changelink = ipgre_changelink, + .get_size = ipgre_get_size, + .fill_info = ipgre_fill_info, +}; + /* * And now the modules code and kernel interface. */ @@ -1238,19 +1609,39 @@ static int __init ipgre_init(void) err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops); if (err < 0) - inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); + goto gen_device_failed; + err = rtnl_link_register(&ipgre_link_ops); + if (err < 0) + goto rtnl_link_failed; + + err = rtnl_link_register(&ipgre_tap_ops); + if (err < 0) + goto tap_ops_failed; + +out: return err; + +tap_ops_failed: + rtnl_link_unregister(&ipgre_link_ops); +rtnl_link_failed: + unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); +gen_device_failed: + inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); + goto out; } static void __exit ipgre_fini(void) { + rtnl_link_unregister(&ipgre_tap_ops); + rtnl_link_unregister(&ipgre_link_ops); + unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); - - unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); } module_init(ipgre_init); module_exit(ipgre_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("gre"); +MODULE_ALIAS_RTNL_LINK("gretap"); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index e0bed56c51f1..861978a4f1a8 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -8,7 +8,7 @@ * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> - * Alan Cox, <Alan.Cox@linux.org> + * Alan Cox, <alan@lxorguk.ukuu.org.uk> * Richard Underwood * Stefan Becker, <stefanb@yello.ping.de> * Jorge Cwik, <jorge@laser.satlink.net> diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index be3f18a7a40e..2c88da6e7862 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -438,7 +438,7 @@ int ip_options_compile(struct net *net, goto error; } opt->cipso = optptr - iph; - if (cipso_v4_validate(&optptr)) { + if (cipso_v4_validate(skb, &optptr)) { pp_ptr = optptr; goto error; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d533a89e08de..d2a8f8bb78a6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -340,6 +340,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) .saddr = inet->saddr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, .dport = inet->dport } } }; @@ -1371,7 +1372,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .uli_u = { .ports = { .sport = tcp_hdr(skb)->dest, .dport = tcp_hdr(skb)->source } }, - .proto = sk->sk_protocol }; + .proto = sk->sk_protocol, + .flags = ip_reply_arg_flowi_flags(arg) }; security_skb_classify_flow(skb, &fl); if (ip_route_output_key(sock_net(sk), &rt, &fl)) return; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 105d92a039b9..465abf0a9869 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -419,7 +419,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<<IP_TTL) | (1<<IP_HDRINCL) | (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | - (1<<IP_PASSSEC))) || + (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) || optname == IP_MULTICAST_TTL || optname == IP_MULTICAST_LOOP) { if (optlen >= sizeof(int)) { @@ -878,6 +878,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = xfrm_user_policy(sk, optname, optval, optlen); break; + case IP_TRANSPARENT: + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (optlen < 1) + goto e_inval; + inet->transparent = !!val; + break; + default: err = -ENOPROTOOPT; break; @@ -1130,6 +1140,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_FREEBIND: val = inet->freebind; break; + case IP_TRANSPARENT: + val = inet->transparent; + break; default: release_sock(sk); return -ENOPROTOOPT; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 4c6d2caf9203..29609d29df76 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -41,7 +41,7 @@ Made the tunnels use dev->name not tunnel: when error reporting. Added tx_dropped stat - -Alan Cox (Alan.Cox@linux.org) 21 March 95 + -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95 Reworked: Changed to tunnel to destination gateway in addition to the diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c519b8d30eee..b42e082cc170 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1,7 +1,7 @@ /* * IP multicast routing support for mrouted 3.6/3.8 * - * (c) 1995 Alan Cox, <alan@redhat.com> + * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> * Linux Consultancy and Custom Driver Development * * This program is free software; you can redistribute it and/or diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c deleted file mode 100644 index 73e0ea87c1f5..000000000000 --- a/net/ipv4/ipvs/ip_vs_proto_ah.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS - * - * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 - * Wensong Zhang <wensong@linuxvirtualserver.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation; - * - */ - -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> - -#include <net/ip_vs.h> - - -/* TODO: - -struct isakmp_hdr { - __u8 icookie[8]; - __u8 rcookie[8]; - __u8 np; - __u8 version; - __u8 xchgtype; - __u8 flags; - __u32 msgid; - __u32 length; -}; - -*/ - -#define PORT_ISAKMP 500 - - -static struct ip_vs_conn * -ah_conn_in_get(const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct iphdr *iph, - unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->saddr, - htons(PORT_ISAKMP), - iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->daddr, - htons(PORT_ISAKMP), - iph->saddr, - htons(PORT_ISAKMP)); - } - - if (!cp) { - /* - * We are not sure if the packet is from our - * service, so our conn_schedule hook should return NF_ACCEPT - */ - IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); - } - - return cp; -} - - -static struct ip_vs_conn * -ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) -{ - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->saddr, - htons(PORT_ISAKMP), - iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->daddr, - htons(PORT_ISAKMP), - iph->saddr, - htons(PORT_ISAKMP)); - } - - if (!cp) { - IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); - } - - return cp; -} - - -static int -ah_conn_schedule(struct sk_buff *skb, - struct ip_vs_protocol *pp, - int *verdict, struct ip_vs_conn **cpp) -{ - /* - * AH is only related traffic. Pass the packet to IP stack. - */ - *verdict = NF_ACCEPT; - return 0; -} - - -static void -ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, - int offset, const char *msg) -{ - char buf[256]; - struct iphdr _iph, *ih; - - ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); - if (ih == NULL) - sprintf(buf, "%s TRUNCATED", pp->name); - else - sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", - pp->name, NIPQUAD(ih->saddr), - NIPQUAD(ih->daddr)); - - printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); -} - - -static void ah_init(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -static void ah_exit(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -struct ip_vs_protocol ip_vs_protocol_ah = { - .name = "AH", - .protocol = IPPROTO_AH, - .num_states = 1, - .dont_defrag = 1, - .init = ah_init, - .exit = ah_exit, - .conn_schedule = ah_conn_schedule, - .conn_in_get = ah_conn_in_get, - .conn_out_get = ah_conn_out_get, - .snat_handler = NULL, - .dnat_handler = NULL, - .csum_check = NULL, - .state_transition = NULL, - .register_app = NULL, - .unregister_app = NULL, - .app_conn_bind = NULL, - .debug_packet = ah_debug_packet, - .timeout_change = NULL, /* ISAKMP */ - .set_state_timeout = NULL, -}; diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c deleted file mode 100644 index 21d70c8ffa54..000000000000 --- a/net/ipv4/ipvs/ip_vs_proto_esp.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS - * - * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 - * Wensong Zhang <wensong@linuxvirtualserver.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation; - * - */ - -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> - -#include <net/ip_vs.h> - - -/* TODO: - -struct isakmp_hdr { - __u8 icookie[8]; - __u8 rcookie[8]; - __u8 np; - __u8 version; - __u8 xchgtype; - __u8 flags; - __u32 msgid; - __u32 length; -}; - -*/ - -#define PORT_ISAKMP 500 - - -static struct ip_vs_conn * -esp_conn_in_get(const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct iphdr *iph, - unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->saddr, - htons(PORT_ISAKMP), - iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->daddr, - htons(PORT_ISAKMP), - iph->saddr, - htons(PORT_ISAKMP)); - } - - if (!cp) { - /* - * We are not sure if the packet is from our - * service, so our conn_schedule hook should return NF_ACCEPT - */ - IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); - } - - return cp; -} - - -static struct ip_vs_conn * -esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) -{ - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->saddr, - htons(PORT_ISAKMP), - iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->daddr, - htons(PORT_ISAKMP), - iph->saddr, - htons(PORT_ISAKMP)); - } - - if (!cp) { - IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); - } - - return cp; -} - - -static int -esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, - int *verdict, struct ip_vs_conn **cpp) -{ - /* - * ESP is only related traffic. Pass the packet to IP stack. - */ - *verdict = NF_ACCEPT; - return 0; -} - - -static void -esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, - int offset, const char *msg) -{ - char buf[256]; - struct iphdr _iph, *ih; - - ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); - if (ih == NULL) - sprintf(buf, "%s TRUNCATED", pp->name); - else - sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", - pp->name, NIPQUAD(ih->saddr), - NIPQUAD(ih->daddr)); - - printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); -} - - -static void esp_init(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -static void esp_exit(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -struct ip_vs_protocol ip_vs_protocol_esp = { - .name = "ESP", - .protocol = IPPROTO_ESP, - .num_states = 1, - .dont_defrag = 1, - .init = esp_init, - .exit = esp_exit, - .conn_schedule = esp_conn_schedule, - .conn_in_get = esp_conn_in_get, - .conn_out_get = esp_conn_out_get, - .snat_handler = NULL, - .dnat_handler = NULL, - .csum_check = NULL, - .state_transition = NULL, - .register_app = NULL, - .unregister_app = NULL, - .app_conn_bind = NULL, - .debug_packet = esp_debug_packet, - .timeout_change = NULL, /* ISAKMP */ -}; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f8edacdf991d..6efdb70b3eb2 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -12,6 +12,7 @@ /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) { + struct net *net = dev_net(skb->dst->dev); const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi fl = {}; @@ -19,7 +20,9 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) unsigned int hh_len; unsigned int type; - type = inet_addr_type(&init_net, iph->saddr); + type = inet_addr_type(net, iph->saddr); + if (skb->sk && inet_sk(skb->sk)->transparent) + type = RTN_LOCAL; if (addr_type == RTN_UNSPEC) addr_type = type; @@ -33,7 +36,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl.mark = skb->mark; - if (ip_route_output_key(&init_net, &rt, &fl) != 0) + fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; + if (ip_route_output_key(net, &rt, &fl) != 0) return -1; /* Drop old route. */ @@ -43,7 +47,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ fl.nl_u.ip4_u.daddr = iph->saddr; - if (ip_route_output_key(&init_net, &rt, &fl) != 0) + if (ip_route_output_key(net, &rt, &fl) != 0) return -1; odst = skb->dst; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 90eb7cb47e77..3816e1dc9295 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -5,10 +5,15 @@ menu "IP: Netfilter Configuration" depends on INET && NETFILTER +config NF_DEFRAG_IPV4 + tristate + default n + config NF_CONNTRACK_IPV4 tristate "IPv4 connection tracking support (required for NAT)" depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n + select NF_DEFRAG_IPV4 ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related @@ -56,23 +61,30 @@ config IP_NF_IPTABLES To compile it as a module, choose M here. If unsure, say N. +if IP_NF_IPTABLES + # The matches. -config IP_NF_MATCH_RECENT - tristate '"recent" match support' - depends on IP_NF_IPTABLES +config IP_NF_MATCH_ADDRTYPE + tristate '"addrtype" address type match support' depends on NETFILTER_ADVANCED help - This match is used for creating one or many lists of recently - used addresses and then matching against that/those list(s). + This option allows you to match what routing thinks of an address, + eg. UNICAST, LOCAL, BROADCAST, ... - Short options are available by using 'iptables -m recent -h' - Official Website: <http://snowman.net/projects/ipt_recent/> + If you want to compile it as a module, say M here and read + <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. + +config IP_NF_MATCH_AH + tristate '"ah" match support' + depends on NETFILTER_ADVANCED + help + This match extension allows you to match a range of SPIs + inside AH header of IPSec packets. To compile it as a module, choose M here. If unsure, say N. config IP_NF_MATCH_ECN tristate '"ecn" match support' - depends on IP_NF_IPTABLES depends on NETFILTER_ADVANCED help This option adds a `ECN' match, which allows you to match against @@ -80,19 +92,8 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_AH - tristate '"ah" match support' - depends on IP_NF_IPTABLES - depends on NETFILTER_ADVANCED - help - This match extension allows you to match a range of SPIs - inside AH header of IPSec packets. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_MATCH_TTL tristate '"ttl" match support' - depends on IP_NF_IPTABLES depends on NETFILTER_ADVANCED help This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user @@ -100,21 +101,9 @@ config IP_NF_MATCH_TTL To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_ADDRTYPE - tristate '"addrtype" address type match support' - depends on IP_NF_IPTABLES - depends on NETFILTER_ADVANCED - help - This option allows you to match what routing thinks of an address, - eg. UNICAST, LOCAL, BROADCAST, ... - - If you want to compile it as a module, say M here and read - <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. - # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n help Packet filtering defines a table `filter', which has a series of @@ -136,7 +125,6 @@ config IP_NF_TARGET_REJECT config IP_NF_TARGET_LOG tristate "LOG target support" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n help This option adds a `LOG' target, which allows you to create rules in @@ -146,7 +134,6 @@ config IP_NF_TARGET_LOG config IP_NF_TARGET_ULOG tristate "ULOG target support" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n ---help--- @@ -167,7 +154,7 @@ config IP_NF_TARGET_ULOG # NAT + specific targets: nf_conntrack config NF_NAT tristate "Full NAT" - depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4 + depends on NF_CONNTRACK_IPV4 default m if NETFILTER_ADVANCED=n help The Full NAT option allows masquerading, port forwarding and other @@ -194,26 +181,26 @@ config IP_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_REDIRECT - tristate "REDIRECT target support" +config IP_NF_TARGET_NETMAP + tristate "NETMAP target support" depends on NF_NAT depends on NETFILTER_ADVANCED help - REDIRECT is a special case of NAT: all incoming connections are - mapped onto the incoming interface's address, causing the packets to - come to the local machine instead of passing through. This is - useful for transparent proxies. + NETMAP is an implementation of static 1:1 NAT mapping of network + addresses. It maps the network address part, while keeping the host + address part intact. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_NETMAP - tristate "NETMAP target support" +config IP_NF_TARGET_REDIRECT + tristate "REDIRECT target support" depends on NF_NAT depends on NETFILTER_ADVANCED help - NETMAP is an implementation of static 1:1 NAT mapping of network - addresses. It maps the network address part, while keeping the host - address part intact. + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for transparent proxies. To compile it as a module, choose M here. If unsure, say N. @@ -262,44 +249,43 @@ config NF_NAT_PROTO_SCTP config NF_NAT_FTP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_FTP config NF_NAT_IRC tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_IRC config NF_NAT_TFTP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_TFTP config NF_NAT_AMANDA tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_AMANDA config NF_NAT_PPTP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_PPTP select NF_NAT_PROTO_GRE config NF_NAT_H323 tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_H323 config NF_NAT_SIP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_SIP # mangle + specific targets config IP_NF_MANGLE tristate "Packet mangling" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n help This option adds a `mangle' table to iptables: see the man page for @@ -308,6 +294,19 @@ config IP_NF_MANGLE To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_CLUSTERIP + tristate "CLUSTERIP target support (EXPERIMENTAL)" + depends on IP_NF_MANGLE && EXPERIMENTAL + depends on NF_CONNTRACK_IPV4 + depends on NETFILTER_ADVANCED + select NF_CONNTRACK_MARK + help + The CLUSTERIP target allows you to build load-balancing clusters of + network servers without having a dedicated load-balancing + router/server/switch. + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_TARGET_ECN tristate "ECN target support" depends on IP_NF_MANGLE @@ -338,23 +337,9 @@ config IP_NF_TARGET_TTL To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_CLUSTERIP - tristate "CLUSTERIP target support (EXPERIMENTAL)" - depends on IP_NF_MANGLE && EXPERIMENTAL - depends on NF_CONNTRACK_IPV4 - depends on NETFILTER_ADVANCED - select NF_CONNTRACK_MARK - help - The CLUSTERIP target allows you to build load-balancing clusters of - network servers without having a dedicated load-balancing - router/server/switch. - - To compile it as a module, choose M here. If unsure, say N. - # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' - depends on IP_NF_IPTABLES depends on NETFILTER_ADVANCED help This option adds a `raw' table to iptables. This table is the very @@ -367,7 +352,6 @@ config IP_NF_RAW # security table for MAC policy config IP_NF_SECURITY tristate "Security table" - depends on IP_NF_IPTABLES depends on SECURITY depends on NETFILTER_ADVANCED help @@ -376,6 +360,8 @@ config IP_NF_SECURITY If unsure, say N. +endif # IP_NF_IPTABLES + # ARP tables config IP_NF_ARPTABLES tristate "ARP tables support" @@ -388,9 +374,10 @@ config IP_NF_ARPTABLES To compile it as a module, choose M here. If unsure, say N. +if IP_NF_ARPTABLES + config IP_NF_ARPFILTER tristate "ARP packet filtering" - depends on IP_NF_ARPTABLES help ARP packet filtering defines a table `filter', which has a series of rules for simple ARP packet filtering at local input and @@ -401,10 +388,11 @@ config IP_NF_ARPFILTER config IP_NF_ARP_MANGLE tristate "ARP payload mangling" - depends on IP_NF_ARPTABLES help Allows altering the ARP packet payload: source and destination hardware and network addresses. +endif # IP_NF_ARPTABLES + endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3f31291f37ce..5f9b650d90fc 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -18,6 +18,9 @@ obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o obj-$(CONFIG_NF_NAT) += nf_nat.o +# defrag +obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o + # NAT helpers (nf_conntrack) obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o @@ -48,7 +51,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o # targets diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 03e83a65aec5..8d70d29f1ccf 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -200,15 +200,12 @@ static inline int arp_checkentry(const struct arpt_arp *arp) return 1; } -static unsigned int arpt_error(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +arpt_error(struct sk_buff *skb, const struct xt_target_param *par) { if (net_ratelimit()) - printk("arp_tables: error: '%s'\n", (char *)targinfo); + printk("arp_tables: error: '%s'\n", + (const char *)par->targinfo); return NF_DROP; } @@ -232,6 +229,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, const char *indev, *outdev; void *table_base; const struct xt_table_info *private; + struct xt_target_param tgpar; if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return NF_DROP; @@ -245,6 +243,11 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); + tgpar.in = in; + tgpar.out = out; + tgpar.hooknum = hook; + tgpar.family = NFPROTO_ARP; + arp = arp_hdr(skb); do { if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { @@ -290,11 +293,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, /* Targets which reenter must return * abs. verdicts */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, - in, out, - hook, - t->u.kernel.target, - t->data); + &tgpar); /* Target might have changed stuff. */ arp = arp_hdr(skb); @@ -456,23 +458,24 @@ static inline int check_entry(struct arpt_entry *e, const char *name) static inline int check_target(struct arpt_entry *e, const char *name) { - struct arpt_entry_target *t; - struct xt_target *target; + struct arpt_entry_target *t = arpt_get_target(e); int ret; - - t = arpt_get_target(e); - target = t->u.kernel.target; - - ret = xt_check_target(target, NF_ARP, t->u.target_size - sizeof(*t), - name, e->comefrom, 0, 0); - if (!ret && t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { + struct xt_tgchk_param par = { + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_ARP, + }; + + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); + if (ret < 0) { duprintf("arp_tables: check failed for `%s'.\n", t->u.kernel.target->name); - ret = -EINVAL; + return ret; } - return ret; + return 0; } static inline int @@ -488,7 +491,8 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, return ret; t = arpt_get_target(e); - target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name, + target = try_then_request_module(xt_find_target(NFPROTO_ARP, + t->u.user.name, t->u.user.revision), "arpt_%s", t->u.user.name); if (IS_ERR(target) || !target) { @@ -554,15 +558,19 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) { + struct xt_tgdtor_param par; struct arpt_entry_target *t; if (i && (*i)-- == 0) return 1; t = arpt_get_target(e); - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_ARP; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -788,7 +796,7 @@ static void compat_standard_from_user(void *dst, void *src) int v = *(compat_int_t *)src; if (v > 0) - v += xt_compat_calc_jump(NF_ARP, v); + v += xt_compat_calc_jump(NFPROTO_ARP, v); memcpy(dst, &v, sizeof(v)); } @@ -797,7 +805,7 @@ static int compat_standard_to_user(void __user *dst, void *src) compat_int_t cv = *(int *)src; if (cv > 0) - cv -= xt_compat_calc_jump(NF_ARP, cv); + cv -= xt_compat_calc_jump(NFPROTO_ARP, cv); return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } @@ -815,7 +823,7 @@ static int compat_calc_entry(struct arpt_entry *e, t = arpt_get_target(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; - ret = xt_compat_add_offset(NF_ARP, entry_offset, off); + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); if (ret) return ret; @@ -866,9 +874,9 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_COMPAT if (compat) - xt_compat_lock(NF_ARP); + xt_compat_lock(NFPROTO_ARP); #endif - t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); if (t && !IS_ERR(t)) { struct arpt_getinfo info; @@ -878,7 +886,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (compat) { struct xt_table_info tmp; ret = compat_table_info(private, &tmp); - xt_compat_flush_offsets(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); private = &tmp; } #endif @@ -901,7 +909,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) ret = t ? PTR_ERR(t) : -ENOENT; #ifdef CONFIG_COMPAT if (compat) - xt_compat_unlock(NF_ARP); + xt_compat_unlock(NFPROTO_ARP); #endif return ret; } @@ -925,7 +933,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, return -EINVAL; } - t = xt_find_table_lock(net, NF_ARP, get.name); + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (t && !IS_ERR(t)) { const struct xt_table_info *private = t->private; @@ -967,7 +975,7 @@ static int __do_replace(struct net *net, const char *name, goto out; } - t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1134,7 +1142,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - t = xt_find_table_lock(net, NF_ARP, name); + t = xt_find_table_lock(net, NFPROTO_ARP, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1218,7 +1226,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, entry_offset = (void *)e - (void *)base; t = compat_arpt_get_target(e); - target = try_then_request_module(xt_find_target(NF_ARP, + target = try_then_request_module(xt_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision), "arpt_%s", t->u.user.name); @@ -1232,7 +1240,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, off += xt_compat_target_offset(target); *size += off; - ret = xt_compat_add_offset(NF_ARP, entry_offset, off); + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); if (ret) goto release_target; @@ -1333,7 +1341,7 @@ static int translate_compat_table(const char *name, duprintf("translate_compat_table: size %u\n", info->size); j = 0; - xt_compat_lock(NF_ARP); + xt_compat_lock(NFPROTO_ARP); /* Walk through entries, checking offsets. */ ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, check_compat_entry_size_and_hooks, @@ -1383,8 +1391,8 @@ static int translate_compat_table(const char *name, ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_copy_entry_from_user, &pos, &size, name, newinfo, entry1); - xt_compat_flush_offsets(NF_ARP); - xt_compat_unlock(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); if (ret) goto free_newinfo; @@ -1420,8 +1428,8 @@ out: COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); return ret; out_unlock: - xt_compat_flush_offsets(NF_ARP); - xt_compat_unlock(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); goto out; } @@ -1607,8 +1615,8 @@ static int compat_get_entries(struct net *net, return -EINVAL; } - xt_compat_lock(NF_ARP); - t = xt_find_table_lock(net, NF_ARP, get.name); + xt_compat_lock(NFPROTO_ARP); + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (t && !IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1623,13 +1631,13 @@ static int compat_get_entries(struct net *net, private->size, get.size); ret = -EAGAIN; } - xt_compat_flush_offsets(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); module_put(t->me); xt_table_unlock(t); } else ret = t ? PTR_ERR(t) : -ENOENT; - xt_compat_unlock(NF_ARP); + xt_compat_unlock(NFPROTO_ARP); return ret; } @@ -1709,7 +1717,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len break; } - try_then_request_module(xt_find_revision(NF_ARP, rev.name, + try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, rev.revision, 1, &ret), "arpt_%s", rev.name); break; @@ -1787,7 +1795,7 @@ void arpt_unregister_table(struct xt_table *table) static struct xt_target arpt_standard_target __read_mostly = { .name = ARPT_STANDARD_TARGET, .targetsize = sizeof(int), - .family = NF_ARP, + .family = NFPROTO_ARP, #ifdef CONFIG_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = compat_standard_from_user, @@ -1799,7 +1807,7 @@ static struct xt_target arpt_error_target __read_mostly = { .name = ARPT_ERROR_TARGET, .target = arpt_error, .targetsize = ARPT_FUNCTION_MAXNAMELEN, - .family = NF_ARP, + .family = NFPROTO_ARP, }; static struct nf_sockopt_ops arpt_sockopts = { @@ -1821,12 +1829,12 @@ static struct nf_sockopt_ops arpt_sockopts = { static int __net_init arp_tables_net_init(struct net *net) { - return xt_proto_init(net, NF_ARP); + return xt_proto_init(net, NFPROTO_ARP); } static void __net_exit arp_tables_net_exit(struct net *net) { - xt_proto_fini(net, NF_ARP); + xt_proto_fini(net, NFPROTO_ARP); } static struct pernet_operations arp_tables_net_ops = { diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index a385959d2655..b0d5b1d0a769 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -9,12 +9,9 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); MODULE_DESCRIPTION("arptables arp payload mangle target"); static unsigned int -target(struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - unsigned int hooknum, const struct xt_target *target, - const void *targinfo) +target(struct sk_buff *skb, const struct xt_target_param *par) { - const struct arpt_mangle *mangle = targinfo; + const struct arpt_mangle *mangle = par->targinfo; const struct arphdr *arp; unsigned char *arpptr; int pln, hln; @@ -57,11 +54,9 @@ target(struct sk_buff *skb, return mangle->target; } -static bool -checkentry(const char *tablename, const void *e, const struct xt_target *target, - void *targinfo, unsigned int hook_mask) +static bool checkentry(const struct xt_tgchk_param *par) { - const struct arpt_mangle *mangle = targinfo; + const struct arpt_mangle *mangle = par->targinfo; if (mangle->flags & ~ARPT_MANGLE_MASK || !(mangle->flags & ARPT_MANGLE_MASK)) @@ -75,7 +70,7 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target, static struct xt_target arpt_mangle_reg __read_mostly = { .name = "mangle", - .family = NF_ARP, + .family = NFPROTO_ARP, .target = target, .targetsize = sizeof(struct arpt_mangle), .checkentry = checkentry, diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 082f5dd3156c..bee3d117661a 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -51,7 +51,7 @@ static struct xt_table packet_filter = { .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), .private = NULL, .me = THIS_MODULE, - .af = NF_ARP, + .af = NFPROTO_ARP, }; /* The work comes in here from netfilter.c */ @@ -89,21 +89,21 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = { { .hook = arpt_in_hook, .owner = THIS_MODULE, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_IN, .priority = NF_IP_PRI_FILTER, }, { .hook = arpt_out_hook, .owner = THIS_MODULE, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_OUT, .priority = NF_IP_PRI_FILTER, }, { .hook = arpt_forward_hook, .owner = THIS_MODULE, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_FORWARD, .priority = NF_IP_PRI_FILTER, }, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4e7c719445c2..213fb27debc1 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -171,31 +171,25 @@ ip_checkentry(const struct ipt_ip *ip) } static unsigned int -ipt_error(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +ipt_error(struct sk_buff *skb, const struct xt_target_param *par) { if (net_ratelimit()) - printk("ip_tables: error: `%s'\n", (char *)targinfo); + printk("ip_tables: error: `%s'\n", + (const char *)par->targinfo); return NF_DROP; } /* Performance critical - called for every packet */ static inline bool -do_match(struct ipt_entry_match *m, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int offset, - bool *hotdrop) +do_match(struct ipt_entry_match *m, const struct sk_buff *skb, + struct xt_match_param *par) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + /* Stop iteration if it doesn't match */ - if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, - offset, ip_hdrlen(skb), hotdrop)) + if (!m->u.kernel.match->match(skb, par)) return true; else return false; @@ -326,7 +320,6 @@ ipt_do_table(struct sk_buff *skb, struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); - u_int16_t offset; const struct iphdr *ip; u_int16_t datalen; bool hotdrop = false; @@ -336,6 +329,8 @@ ipt_do_table(struct sk_buff *skb, void *table_base; struct ipt_entry *e, *back; struct xt_table_info *private; + struct xt_match_param mtpar; + struct xt_target_param tgpar; /* Initialization */ ip = ip_hdr(skb); @@ -348,7 +343,13 @@ ipt_do_table(struct sk_buff *skb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ - offset = ntohs(ip->frag_off) & IP_OFFSET; + mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; + mtpar.thoff = ip_hdrlen(skb); + mtpar.hotdrop = &hotdrop; + mtpar.in = tgpar.in = in; + mtpar.out = tgpar.out = out; + mtpar.family = tgpar.family = NFPROTO_IPV4; + tgpar.hooknum = hook; read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); @@ -362,12 +363,11 @@ ipt_do_table(struct sk_buff *skb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { + if (ip_packet_match(ip, indev, outdev, + &e->ip, mtpar.fragoff)) { struct ipt_entry_target *t; - if (IPT_MATCH_ITERATE(e, do_match, - skb, in, out, - offset, &hotdrop) != 0) + if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) goto no_match; ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); @@ -413,16 +413,14 @@ ipt_do_table(struct sk_buff *skb, } else { /* Targets which reenter must return abs. verdicts */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; #ifdef CONFIG_NETFILTER_DEBUG ((struct ipt_entry *)table_base)->comefrom = 0xeeeeeeec; #endif verdict = t->u.kernel.target->target(skb, - in, out, - hook, - t->u.kernel.target, - t->data); - + &tgpar); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec @@ -575,12 +573,17 @@ mark_source_chains(struct xt_table_info *newinfo, static int cleanup_match(struct ipt_entry_match *m, unsigned int *i) { + struct xt_mtdtor_param par; + if (i && (*i)-- == 0) return 1; - if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data); - module_put(m->u.kernel.match->me); + par.match = m->u.kernel.match; + par.matchinfo = m->data; + par.family = NFPROTO_IPV4; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); return 0; } @@ -606,34 +609,28 @@ check_entry(struct ipt_entry *e, const char *name) } static int -check_match(struct ipt_entry_match *m, const char *name, - const struct ipt_ip *ip, - unsigned int hookmask, unsigned int *i) +check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, + unsigned int *i) { - struct xt_match *match; + const struct ipt_ip *ip = par->entryinfo; int ret; - match = m->u.kernel.match; - ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), - name, hookmask, ip->proto, - ip->invflags & IPT_INV_PROTO); - if (!ret && m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ip, match, m->data, - hookmask)) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + + ret = xt_check_match(par, m->u.match_size - sizeof(*m), + ip->proto, ip->invflags & IPT_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; + par.match->name); + return ret; } - if (!ret) - (*i)++; - return ret; + ++*i; + return 0; } static int -find_check_match(struct ipt_entry_match *m, - const char *name, - const struct ipt_ip *ip, - unsigned int hookmask, +find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, unsigned int *i) { struct xt_match *match; @@ -648,7 +645,7 @@ find_check_match(struct ipt_entry_match *m, } m->u.kernel.match = match; - ret = check_match(m, name, ip, hookmask, i); + ret = check_match(m, par, i); if (ret) goto err; @@ -660,23 +657,25 @@ err: static int check_target(struct ipt_entry *e, const char *name) { - struct ipt_entry_target *t; - struct xt_target *target; + struct ipt_entry_target *t = ipt_get_target(e); + struct xt_tgchk_param par = { + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_IPV4, + }; int ret; - t = ipt_get_target(e); - target = t->u.kernel.target; - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ip.proto, - e->ip.invflags & IPT_INV_PROTO); - if (!ret && t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ip.proto, e->ip.invflags & IPT_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); - ret = -EINVAL; + return ret; } - return ret; + return 0; } static int @@ -687,14 +686,18 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, struct xt_target *target; int ret; unsigned int j; + struct xt_mtchk_param mtpar; ret = check_entry(e, name); if (ret) return ret; j = 0; - ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j); if (ret != 0) goto cleanup_matches; @@ -769,6 +772,7 @@ check_entry_size_and_hooks(struct ipt_entry *e, static int cleanup_entry(struct ipt_entry *e, unsigned int *i) { + struct xt_tgdtor_param par; struct ipt_entry_target *t; if (i && (*i)-- == 0) @@ -777,9 +781,13 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) /* Cleanup all matches */ IPT_MATCH_ITERATE(e, cleanup_match, NULL); t = ipt_get_target(e); - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_IPV4; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -1648,12 +1656,16 @@ static int compat_check_entry(struct ipt_entry *e, const char *name, unsigned int *i) { + struct xt_mtchk_param mtpar; unsigned int j; int ret; j = 0; - ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j); if (ret) goto cleanup_matches; @@ -2121,30 +2133,23 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, } static bool -icmp_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +icmp_match(const struct sk_buff *skb, const struct xt_match_param *par) { const struct icmphdr *ic; struct icmphdr _icmph; - const struct ipt_icmp *icmpinfo = matchinfo; + const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph); + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (ic == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ICMP tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -2155,15 +2160,9 @@ icmp_match(const struct sk_buff *skb, !!(icmpinfo->invflags&IPT_ICMP_INV)); } -/* Called when user tries to insert an entry of this type. */ -static bool -icmp_checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool icmp_checkentry(const struct xt_mtchk_param *par) { - const struct ipt_icmp *icmpinfo = matchinfo; + const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must specify no unknown invflags */ return !(icmpinfo->invflags & ~IPT_ICMP_INV); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index fafe8ebb4c55..7ac1677419a9 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -281,11 +281,9 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) ***********************************************************************/ static unsigned int -clusterip_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_clusterip_tgt_info *cipinfo = targinfo; + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; struct nf_conn *ct; enum ip_conntrack_info ctinfo; u_int32_t hash; @@ -349,13 +347,10 @@ clusterip_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -clusterip_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool clusterip_tg_check(const struct xt_tgchk_param *par) { - struct ipt_clusterip_tgt_info *cipinfo = targinfo; - const struct ipt_entry *e = e_void; + struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; struct clusterip_config *config; @@ -406,9 +401,9 @@ clusterip_tg_check(const char *tablename, const void *e_void, } cipinfo->config = config; - if (nf_ct_l3proto_try_module_get(target->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", target->family); + "proto=%u\n", par->target->family); return false; } @@ -416,9 +411,9 @@ clusterip_tg_check(const char *tablename, const void *e_void, } /* drop reference count of cluster config when rule is deleted */ -static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo) +static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) { - const struct ipt_clusterip_tgt_info *cipinfo = targinfo; + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; /* if no more entries are referencing the config, remove it * from the list and destroy the proc entry */ @@ -426,7 +421,7 @@ static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo) clusterip_config_put(cipinfo->config); - nf_ct_l3proto_module_put(target->family); + nf_ct_l3proto_module_put(par->target->family); } #ifdef CONFIG_COMPAT @@ -445,7 +440,7 @@ struct compat_ipt_clusterip_tgt_info static struct xt_target clusterip_tg_reg __read_mostly = { .name = "CLUSTERIP", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = clusterip_tg, .checkentry = clusterip_tg_check, .destroy = clusterip_tg_destroy, @@ -546,7 +541,7 @@ arp_mangle(unsigned int hook, static struct nf_hook_ops cip_arp_ops __read_mostly = { .hook = arp_mangle, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_OUT, .priority = -1 }; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index d60139c134ca..f7e2fa0974dc 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -77,11 +77,9 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) } static unsigned int -ecn_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +ecn_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_ECN_info *einfo = targinfo; + const struct ipt_ECN_info *einfo = par->targinfo; if (einfo->operation & IPT_ECN_OP_SET_IP) if (!set_ect_ip(skb, einfo)) @@ -95,13 +93,10 @@ ecn_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -ecn_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool ecn_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_ECN_info *einfo = targinfo; - const struct ipt_entry *e = e_void; + const struct ipt_ECN_info *einfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; if (einfo->operation & IPT_ECN_OP_MASK) { printk(KERN_WARNING "ECN: unsupported ECN operation %x\n", @@ -124,7 +119,7 @@ ecn_tg_check(const char *tablename, const void *e_void, static struct xt_target ecn_tg_reg __read_mostly = { .name = "ECN", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = ecn_tg, .targetsize = sizeof(struct ipt_ECN_info), .table = "mangle", diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 0af14137137b..fc6ce04a3e35 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -375,7 +375,7 @@ static struct nf_loginfo default_loginfo = { }; static void -ipt_log_packet(unsigned int pf, +ipt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -426,28 +426,23 @@ ipt_log_packet(unsigned int pf, } static unsigned int -log_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +log_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_log_info *loginfo = targinfo; + const struct ipt_log_info *loginfo = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ipt_log_packet(PF_INET, hooknum, skb, in, out, &li, + ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li, loginfo->prefix); return XT_CONTINUE; } -static bool -log_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool log_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_log_info *loginfo = targinfo; + const struct ipt_log_info *loginfo = par->targinfo; if (loginfo->level >= 8) { pr_debug("LOG: level %u >= 8\n", loginfo->level); @@ -463,7 +458,7 @@ log_tg_check(const char *tablename, const void *e, static struct xt_target log_tg_reg __read_mostly = { .name = "LOG", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = log_tg, .targetsize = sizeof(struct ipt_log_info), .checkentry = log_tg_check, @@ -483,7 +478,7 @@ static int __init log_tg_init(void) ret = xt_register_target(&log_tg_reg); if (ret < 0) return ret; - nf_log_register(PF_INET, &ipt_log_logger); + nf_log_register(NFPROTO_IPV4, &ipt_log_logger); return 0; } diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 0841aefaa503..f389f60cb105 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -31,12 +31,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); static DEFINE_RWLOCK(masq_lock); /* FIXME: Multiple targets. --RR */ -static bool -masquerade_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool masquerade_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { pr_debug("masquerade_check: bad MAP_IPS.\n"); @@ -50,9 +47,7 @@ masquerade_tg_check(const char *tablename, const void *e, } static unsigned int -masquerade_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; struct nf_conn_nat *nat; @@ -62,7 +57,7 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in, const struct rtable *rt; __be32 newsrc; - NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING); + NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); nat = nfct_nat(ct); @@ -76,16 +71,16 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in, if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) return NF_ACCEPT; - mr = targinfo; + mr = par->targinfo; rt = skb->rtable; - newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); + newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); if (!newsrc) { - printk("MASQUERADE: %s ate my IP address\n", out->name); + printk("MASQUERADE: %s ate my IP address\n", par->out->name); return NF_DROP; } write_lock_bh(&masq_lock); - nat->masq_index = out->ifindex; + nat->masq_index = par->out->ifindex; write_unlock_bh(&masq_lock); /* Transfer from original range. */ @@ -119,9 +114,7 @@ static int masq_device_event(struct notifier_block *this, void *ptr) { const struct net_device *dev = ptr; - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; + struct net *net = dev_net(dev); if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for @@ -129,7 +122,8 @@ static int masq_device_event(struct notifier_block *this, and forget them. */ NF_CT_ASSERT(dev->ifindex != 0); - nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); + nf_ct_iterate_cleanup(net, device_cmp, + (void *)(long)dev->ifindex); } return NOTIFY_DONE; @@ -153,7 +147,7 @@ static struct notifier_block masq_inet_notifier = { static struct xt_target masquerade_tg_reg __read_mostly = { .name = "MASQUERADE", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = masquerade_tg, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 6739abfd1521..7c29582d4ec8 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -22,12 +22,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>"); MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); -static bool -netmap_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool netmap_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { pr_debug("NETMAP:check: bad MAP_IPS.\n"); @@ -41,24 +38,23 @@ netmap_tg_check(const char *tablename, const void *e, } static unsigned int -netmap_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +netmap_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be32 new_ip, netmask; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; struct nf_nat_range newrange; - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING - || hooknum == NF_INET_POST_ROUTING - || hooknum == NF_INET_LOCAL_OUT); + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_POST_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); - if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_OUT) + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT) new_ip = ip_hdr(skb)->daddr & ~netmask; else new_ip = ip_hdr(skb)->saddr & ~netmask; @@ -70,12 +66,12 @@ netmap_tg(struct sk_buff *skb, const struct net_device *in, mr->range[0].min, mr->range[0].max }); /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(hooknum)); + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); } static struct xt_target netmap_tg_reg __read_mostly = { .name = "NETMAP", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = netmap_tg, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 5c6292449d13..698e5e78685b 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -26,12 +26,9 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); /* FIXME: Take multiple ranges --RR */ -static bool -redirect_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool redirect_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { pr_debug("redirect_check: bad MAP_IPS.\n"); @@ -45,24 +42,22 @@ redirect_tg_check(const char *tablename, const void *e, } static unsigned int -redirect_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be32 newdst; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; struct nf_nat_range newrange; - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING - || hooknum == NF_INET_LOCAL_OUT); + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); ct = nf_ct_get(skb, &ctinfo); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); /* Local packets: make them go to loopback */ - if (hooknum == NF_INET_LOCAL_OUT) + if (par->hooknum == NF_INET_LOCAL_OUT) newdst = htonl(0x7F000001); else { struct in_device *indev; @@ -92,7 +87,7 @@ redirect_tg(struct sk_buff *skb, const struct net_device *in, static struct xt_target redirect_tg_reg __read_mostly = { .name = "REDIRECT", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = redirect_tg, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 2639872849da..0b4b6e0ff2b9 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -136,11 +136,9 @@ static inline void send_unreach(struct sk_buff *skb_in, int code) } static unsigned int -reject_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +reject_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_reject_info *reject = targinfo; + const struct ipt_reject_info *reject = par->targinfo; /* WARNING: This code causes reentry within iptables. This means that the iptables jump stack is now crap. We @@ -168,7 +166,7 @@ reject_tg(struct sk_buff *skb, const struct net_device *in, send_unreach(skb, ICMP_PKT_FILTERED); break; case IPT_TCP_RESET: - send_reset(skb, hooknum); + send_reset(skb, par->hooknum); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; @@ -177,13 +175,10 @@ reject_tg(struct sk_buff *skb, const struct net_device *in, return NF_DROP; } -static bool -reject_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool reject_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_reject_info *rejinfo = targinfo; - const struct ipt_entry *e = e_void; + const struct ipt_reject_info *rejinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; if (rejinfo->with == IPT_ICMP_ECHOREPLY) { printk("ipt_REJECT: ECHOREPLY no longer supported.\n"); @@ -201,7 +196,7 @@ reject_tg_check(const char *tablename, const void *e_void, static struct xt_target reject_tg_reg __read_mostly = { .name = "REJECT", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = reject_tg, .targetsize = sizeof(struct ipt_reject_info), .table = "filter", diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 30eed65e7338..6d76aae90cc0 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -20,12 +20,10 @@ MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target"); MODULE_LICENSE("GPL"); static unsigned int -ttl_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct iphdr *iph; - const struct ipt_TTL_info *info = targinfo; + const struct ipt_TTL_info *info = par->targinfo; int new_ttl; if (!skb_make_writable(skb, skb->len)) @@ -61,12 +59,9 @@ ttl_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -ttl_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool ttl_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_TTL_info *info = targinfo; + const struct ipt_TTL_info *info = par->targinfo; if (info->mode > IPT_TTL_MAXMODE) { printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", @@ -80,7 +75,7 @@ ttl_tg_check(const char *tablename, const void *e, static struct xt_target ttl_tg_reg __read_mostly = { .name = "TTL", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = ttl_tg, .targetsize = sizeof(struct ipt_TTL_info), .table = "mangle", diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index b192756c6d0d..18a2826b57c6 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -281,18 +281,14 @@ alloc_failure: } static unsigned int -ulog_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +ulog_tg(struct sk_buff *skb, const struct xt_target_param *par) { - struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; - - ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL); - + ipt_ulog_packet(par->hooknum, skb, par->in, par->out, + par->targinfo, NULL); return XT_CONTINUE; } -static void ipt_logfn(unsigned int pf, +static void ipt_logfn(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -317,12 +313,9 @@ static void ipt_logfn(unsigned int pf, ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } -static bool -ulog_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hookmask) +static bool ulog_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_ulog_info *loginfo = targinfo; + const struct ipt_ulog_info *loginfo = par->targinfo; if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { pr_debug("ipt_ULOG: prefix term %i\n", @@ -374,7 +367,7 @@ static int ulog_tg_compat_to_user(void __user *dst, void *src) static struct xt_target ulog_tg_reg __read_mostly = { .name = "ULOG", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = ulog_tg, .targetsize = sizeof(struct ipt_ulog_info), .checkentry = ulog_tg_check, @@ -419,7 +412,7 @@ static int __init ulog_tg_init(void) return ret; } if (nflog) - nf_log_register(PF_INET, &ipt_ulog_logger); + nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); return 0; } diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 462a22c97877..88762f02779d 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -30,12 +30,9 @@ static inline bool match_type(const struct net_device *dev, __be32 addr, } static bool -addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_addrtype_info *info = matchinfo; + const struct ipt_addrtype_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool ret = true; @@ -50,20 +47,17 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in, } static bool -addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_addrtype_info_v1 *info = matchinfo; + const struct ipt_addrtype_info_v1 *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); const struct net_device *dev = NULL; bool ret = true; if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) - dev = in; + dev = par->in; else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) - dev = out; + dev = par->out; if (info->source) ret &= match_type(dev, iph->saddr, info->source) ^ @@ -74,12 +68,9 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in, return ret; } -static bool -addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) { - struct ipt_addrtype_info_v1 *info = matchinfo; + struct ipt_addrtype_info_v1 *info = par->matchinfo; if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN && info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { @@ -88,14 +79,16 @@ addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void, return false; } - if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN) && + if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN)) && info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { printk(KERN_ERR "ipt_addrtype: output interface limitation " "not valid in PRE_ROUTING and INPUT\n"); return false; } - if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT) && + if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT)) && info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) { printk(KERN_ERR "ipt_addrtype: input interface limitation " "not valid in POST_ROUTING and OUTPUT\n"); @@ -108,14 +101,14 @@ addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void, static struct xt_match addrtype_mt_reg[] __read_mostly = { { .name = "addrtype", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = addrtype_mt_v0, .matchsize = sizeof(struct ipt_addrtype_info), .me = THIS_MODULE }, { .name = "addrtype", - .family = AF_INET, + .family = NFPROTO_IPV4, .revision = 1, .match = addrtype_mt_v1, .checkentry = addrtype_mt_checkentry_v1, diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index e977989629c7..0104c0b399de 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -36,27 +36,23 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) return r; } -static bool -ah_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par) { struct ip_auth_hdr _ahdr; const struct ip_auth_hdr *ah; - const struct ipt_ah *ahinfo = matchinfo; + const struct ipt_ah *ahinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - ah = skb_header_pointer(skb, protoff, - sizeof(_ahdr), &_ahdr); + ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr); if (ah == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil AH tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return 0; } @@ -65,13 +61,9 @@ ah_mt(const struct sk_buff *skb, const struct net_device *in, !!(ahinfo->invflags & IPT_AH_INV_SPI)); } -/* Called when user tries to insert an entry of this type. */ -static bool -ah_mt_check(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool ah_mt_check(const struct xt_mtchk_param *par) { - const struct ipt_ah *ahinfo = matchinfo; + const struct ipt_ah *ahinfo = par->matchinfo; /* Must specify no unknown invflags */ if (ahinfo->invflags & ~IPT_AH_INV_MASK) { @@ -83,7 +75,7 @@ ah_mt_check(const char *tablename, const void *ip_void, static struct xt_match ah_mt_reg __read_mostly = { .name = "ah", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = ah_mt, .matchsize = sizeof(struct ipt_ah), .proto = IPPROTO_AH, diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 749de8284ce5..6289b64144c6 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -67,12 +67,9 @@ static inline bool match_tcp(const struct sk_buff *skb, return true; } -static bool -ecn_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_ecn_info *info = matchinfo; + const struct ipt_ecn_info *info = par->matchinfo; if (info->operation & IPT_ECN_OP_MATCH_IP) if (!match_ip(skb, info)) @@ -81,20 +78,17 @@ ecn_mt(const struct sk_buff *skb, const struct net_device *in, if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { if (ip_hdr(skb)->protocol != IPPROTO_TCP) return false; - if (!match_tcp(skb, info, hotdrop)) + if (!match_tcp(skb, info, par->hotdrop)) return false; } return true; } -static bool -ecn_mt_check(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool ecn_mt_check(const struct xt_mtchk_param *par) { - const struct ipt_ecn_info *info = matchinfo; - const struct ipt_ip *ip = ip_void; + const struct ipt_ecn_info *info = par->matchinfo; + const struct ipt_ip *ip = par->entryinfo; if (info->operation & IPT_ECN_OP_MATCH_MASK) return false; @@ -114,7 +108,7 @@ ecn_mt_check(const char *tablename, const void *ip_void, static struct xt_match ecn_mt_reg __read_mostly = { .name = "ecn", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = ecn_mt, .matchsize = sizeof(struct ipt_ecn_info), .checkentry = ecn_mt_check, diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c index e0b8caeb710c..297f1cbf4ff5 100644 --- a/net/ipv4/netfilter/ipt_ttl.c +++ b/net/ipv4/netfilter/ipt_ttl.c @@ -18,12 +18,9 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: IPv4 TTL field match"); MODULE_LICENSE("GPL"); -static bool -ttl_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_ttl_info *info = matchinfo; + const struct ipt_ttl_info *info = par->matchinfo; const u8 ttl = ip_hdr(skb)->ttl; switch (info->mode) { @@ -46,7 +43,7 @@ ttl_mt(const struct sk_buff *skb, const struct net_device *in, static struct xt_match ttl_mt_reg __read_mostly = { .name = "ttl", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = ttl_mt, .matchsize = sizeof(struct ipt_ttl_info), .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 1ea677dcf845..c9224310ebae 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -70,7 +70,7 @@ ipt_local_in_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_local_in_net(in, out)->ipv4.iptable_filter); + dev_net(in)->ipv4.iptable_filter); } static unsigned int @@ -81,7 +81,7 @@ ipt_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_forward_net(in, out)->ipv4.iptable_filter); + dev_net(in)->ipv4.iptable_filter); } static unsigned int @@ -101,7 +101,7 @@ ipt_local_out_hook(unsigned int hook, } return ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_filter); + dev_net(out)->ipv4.iptable_filter); } static struct nf_hook_ops ipt_ops[] __read_mostly = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index da59182f2226..69f2c4287146 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -81,7 +81,7 @@ ipt_pre_routing_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_pre_routing_net(in, out)->ipv4.iptable_mangle); + dev_net(in)->ipv4.iptable_mangle); } static unsigned int @@ -92,7 +92,7 @@ ipt_post_routing_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_post_routing_net(in, out)->ipv4.iptable_mangle); + dev_net(out)->ipv4.iptable_mangle); } static unsigned int @@ -103,7 +103,7 @@ ipt_local_in_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_local_in_net(in, out)->ipv4.iptable_mangle); + dev_net(in)->ipv4.iptable_mangle); } static unsigned int @@ -114,7 +114,7 @@ ipt_forward_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_forward_net(in, out)->ipv4.iptable_mangle); + dev_net(in)->ipv4.iptable_mangle); } static unsigned int @@ -147,7 +147,7 @@ ipt_local_hook(unsigned int hook, tos = iph->tos; ret = ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_mangle); + dev_net(out)->ipv4.iptable_mangle); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index fddce7754b72..8faebfe638f1 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -53,7 +53,7 @@ ipt_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_pre_routing_net(in, out)->ipv4.iptable_raw); + dev_net(in)->ipv4.iptable_raw); } static unsigned int @@ -72,7 +72,7 @@ ipt_local_hook(unsigned int hook, return NF_ACCEPT; } return ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_raw); + dev_net(out)->ipv4.iptable_raw); } /* 'raw' is the very first table. */ diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index db6d312128e1..36f3be3cc428 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -73,7 +73,7 @@ ipt_local_in_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_local_in_net(in, out)->ipv4.iptable_security); + dev_net(in)->ipv4.iptable_security); } static unsigned int @@ -84,7 +84,7 @@ ipt_forward_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_forward_net(in, out)->ipv4.iptable_security); + dev_net(in)->ipv4.iptable_security); } static unsigned int @@ -103,7 +103,7 @@ ipt_local_out_hook(unsigned int hook, return NF_ACCEPT; } return ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_security); + dev_net(out)->ipv4.iptable_security); } static struct nf_hook_ops ipt_ops[] __read_mostly = { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5a955c440364..4a7c35275396 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -1,3 +1,4 @@ + /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> * @@ -24,6 +25,7 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, struct nf_conn *ct, @@ -63,23 +65,6 @@ static int ipv4_print_tuple(struct seq_file *s, NIPQUAD(tuple->dst.u3.ip)); } -/* Returns new sk_buff, or NULL */ -static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) -{ - int err; - - skb_orphan(skb); - - local_bh_disable(); - err = ip_defrag(skb, user); - local_bh_enable(); - - if (!err) - ip_send_check(ip_hdr(skb)); - - return err; -} - static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { @@ -144,35 +129,13 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - /* Previously seen (loopback)? Ignore. Do this before - fragment check. */ - if (skb->nfct) - return NF_ACCEPT; - - /* Gather fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (nf_ct_ipv4_gather_frags(skb, - hooknum == NF_INET_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT)) - return NF_STOLEN; - } - return NF_ACCEPT; -} - static unsigned int ipv4_conntrack_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb); } static unsigned int ipv4_conntrack_local(unsigned int hooknum, @@ -188,20 +151,13 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, printk("ipt_hook: happy cracking.\n"); return NF_ACCEPT; } - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so make it the first hook. */ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, - { .hook = ipv4_conntrack_in, .owner = THIS_MODULE, .pf = PF_INET, @@ -209,13 +165,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { .priority = NF_IP_PRI_CONNTRACK, }, { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, - { .hook = ipv4_conntrack_local, .owner = THIS_MODULE, .pf = PF_INET, @@ -254,7 +203,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, .procname = "ip_conntrack_count", - .data = &nf_conntrack_count, + .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = &proc_dointvec, @@ -270,7 +219,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, .procname = "ip_conntrack_checksum", - .data = &nf_conntrack_checksum, + .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -278,7 +227,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, .procname = "ip_conntrack_log_invalid", - .data = &nf_ct_log_invalid, + .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -323,7 +272,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -EINVAL; } - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(sock_net(sk), &tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); @@ -422,6 +371,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) int ret = 0; need_conntrack(); + nf_defrag_ipv4_enable(); ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 3a020720e40b..313ebf00ee36 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -21,18 +21,20 @@ #include <net/netfilter/nf_conntrack_acct.h> struct ct_iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *ct_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - n = rcu_dereference(nf_conntrack_hash[st->bucket].first); + n = rcu_dereference(net->ct.hash[st->bucket].first); if (n) return n; } @@ -42,13 +44,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq) static struct hlist_node *ct_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = rcu_dereference(nf_conntrack_hash[st->bucket].first); + head = rcu_dereference(net->ct.hash[st->bucket].first); } return head; } @@ -158,8 +161,8 @@ static const struct seq_operations ct_seq_ops = { static int ct_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ct_seq_ops, - sizeof(struct ct_iter_state)); + return seq_open_net(inode, file, &ct_seq_ops, + sizeof(struct ct_iter_state)); } static const struct file_operations ct_file_ops = { @@ -167,21 +170,23 @@ static const struct file_operations ct_file_ops = { .open = ct_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; /* expects */ struct ct_expect_iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + n = rcu_dereference(net->ct.expect_hash[st->bucket].first); if (n) return n; } @@ -191,13 +196,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + head = rcu_dereference(net->ct.expect_hash[st->bucket].first); } return head; } @@ -265,8 +271,8 @@ static const struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &exp_seq_ops, - sizeof(struct ct_expect_iter_state)); + return seq_open_net(inode, file, &exp_seq_ops, + sizeof(struct ct_expect_iter_state)); } static const struct file_operations ip_exp_file_ops = { @@ -274,11 +280,12 @@ static const struct file_operations ip_exp_file_ops = { .open = exp_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq_file_net(seq); int cpu; if (*pos == 0) @@ -288,7 +295,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu+1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(net->ct.stat, cpu); } return NULL; @@ -296,13 +303,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq_file_net(seq); int cpu; for (cpu = *pos; cpu < NR_CPUS; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(net->ct.stat, cpu); } return NULL; @@ -314,7 +322,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { - unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); + struct net *net = seq_file_net(seq); + unsigned int nr_conntracks = atomic_read(&net->ct.count); const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { @@ -354,7 +363,8 @@ static const struct seq_operations ct_cpu_seq_ops = { static int ct_cpu_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &ct_cpu_seq_ops); + return seq_open_net(inode, file, &ct_cpu_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations ct_cpu_seq_fops = { @@ -362,39 +372,54 @@ static const struct file_operations ct_cpu_seq_fops = { .open = ct_cpu_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; -int __init nf_conntrack_ipv4_compat_init(void) +static int __net_init ip_conntrack_net_init(struct net *net) { struct proc_dir_entry *proc, *proc_exp, *proc_stat; - proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(net, "ip_conntrack", 0440, &ct_file_ops); if (!proc) goto err1; - proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440, + proc_exp = proc_net_fops_create(net, "ip_conntrack_expect", 0440, &ip_exp_file_ops); if (!proc_exp) goto err2; proc_stat = proc_create("ip_conntrack", S_IRUGO, - init_net.proc_net_stat, &ct_cpu_seq_fops); + net->proc_net_stat, &ct_cpu_seq_fops); if (!proc_stat) goto err3; return 0; err3: - proc_net_remove(&init_net, "ip_conntrack_expect"); + proc_net_remove(net, "ip_conntrack_expect"); err2: - proc_net_remove(&init_net, "ip_conntrack"); + proc_net_remove(net, "ip_conntrack"); err1: return -ENOMEM; } +static void __net_exit ip_conntrack_net_exit(struct net *net) +{ + remove_proc_entry("ip_conntrack", net->proc_net_stat); + proc_net_remove(net, "ip_conntrack_expect"); + proc_net_remove(net, "ip_conntrack"); +} + +static struct pernet_operations ip_conntrack_net_ops = { + .init = ip_conntrack_net_init, + .exit = ip_conntrack_net_exit, +}; + +int __init nf_conntrack_ipv4_compat_init(void) +{ + return register_pernet_subsys(&ip_conntrack_net_ops); +} + void __exit nf_conntrack_ipv4_compat_fini(void) { - remove_proc_entry("ip_conntrack", init_net.proc_net_stat); - proc_net_remove(&init_net, "ip_conntrack_expect"); - proc_net_remove(&init_net, "ip_conntrack"); + unregister_pernet_subsys(&ip_conntrack_net_ops); } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 97791048fa9b..4e8879220222 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -79,7 +79,7 @@ static int icmp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* Try to delete connection immediately after all replies: @@ -91,7 +91,7 @@ static int icmp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); } @@ -123,7 +123,7 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int -icmp_error_message(struct sk_buff *skb, +icmp_error_message(struct net *net, struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum) { @@ -155,7 +155,7 @@ icmp_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&innertuple); + h = nf_conntrack_find_get(net, &innertuple); if (!h) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; @@ -172,8 +172,8 @@ icmp_error_message(struct sk_buff *skb, /* Small and modified version of icmp_rcv */ static int -icmp_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) +icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmphdr *icmph; struct icmphdr _ih; @@ -181,16 +181,16 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, /* Not enough header? */ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: short packet "); return -NF_ACCEPT; } /* See ip_conntrack_proto_tcp.c */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; @@ -203,7 +203,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, * discarded. */ if (icmph->type > NR_ICMP_TYPES) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; @@ -217,7 +217,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, && icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(skb, ctinfo, hooknum); + return icmp_error_message(net, skb, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c new file mode 100644 index 000000000000..fa2d6b6fc3e5 --- /dev/null +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -0,0 +1,97 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/route.h> +#include <net/ip.h> + +#include <linux/netfilter_ipv4.h> +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> + +/* Returns new sk_buff, or NULL */ +static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +{ + int err; + + skb_orphan(skb); + + local_bh_disable(); + err = ip_defrag(skb, user); + local_bh_enable(); + + if (!err) + ip_send_check(ip_hdr(skb)); + + return err; +} + +static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if (skb->nfct) + return NF_ACCEPT; +#endif +#endif + /* Gather fragments. */ + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (nf_ct_ipv4_gather_frags(skb, + hooknum == NF_INET_PRE_ROUTING ? + IP_DEFRAG_CONNTRACK_IN : + IP_DEFRAG_CONNTRACK_OUT)) + return NF_STOLEN; + } + return NF_ACCEPT; +} + +static struct nf_hook_ops ipv4_defrag_ops[] = { + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, +}; + +static int __init nf_defrag_init(void) +{ + return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +static void __exit nf_defrag_fini(void) +{ + nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +void nf_defrag_ipv4_enable(void) +{ +} +EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); + +module_init(nf_defrag_init); +module_exit(nf_defrag_fini); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 6c6a3cba8d50..a65cf692359f 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -37,9 +37,6 @@ static struct nf_conntrack_l3proto *l3proto __read_mostly; /* Calculated at init based on memory size */ static unsigned int nf_nat_htable_size __read_mostly; -static int nf_nat_vmalloced; - -static struct hlist_head *bysource __read_mostly; #define MAX_IP_NAT_PROTO 256 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] @@ -145,7 +142,8 @@ same_src(const struct nf_conn *ct, /* Only called for SRC manip */ static int -find_appropriate_src(const struct nf_conntrack_tuple *tuple, +find_appropriate_src(struct net *net, + const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { @@ -155,7 +153,7 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, const struct hlist_node *n; rcu_read_lock(); - hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) { + hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ @@ -231,6 +229,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); const struct nf_nat_protocol *proto; /* 1) If this srcip/proto/src-proto-part is currently mapped, @@ -242,7 +241,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, manips not an issue. */ if (maniptype == IP_NAT_MANIP_SRC && !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { - if (find_appropriate_src(orig_tuple, tuple, range)) { + if (find_appropriate_src(net, orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) return; @@ -283,6 +282,7 @@ nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple curr_tuple, new_tuple; struct nf_conn_nat *nat; int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); @@ -334,7 +334,8 @@ nf_nat_setup_info(struct nf_conn *ct, /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); nat->ct = ct; - hlist_add_head_rcu(&nat->bysource, &bysource[srchash]); + hlist_add_head_rcu(&nat->bysource, + &net->ipv4.nat_bysource[srchash]); spin_unlock_bh(&nf_nat_lock); } @@ -583,6 +584,132 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { .flags = NF_CT_EXT_F_PREALLOC, }; +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> + +static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { + [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, + [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, +}; + +static int nfnetlink_parse_nat_proto(struct nlattr *attr, + const struct nf_conn *ct, + struct nf_nat_range *range) +{ + struct nlattr *tb[CTA_PROTONAT_MAX+1]; + const struct nf_nat_protocol *npt; + int err; + + err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); + if (err < 0) + return err; + + npt = nf_nat_proto_find_get(nf_ct_protonum(ct)); + if (npt->nlattr_to_range) + err = npt->nlattr_to_range(tb, range); + nf_nat_proto_put(npt); + return err; +} + +static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { + [CTA_NAT_MINIP] = { .type = NLA_U32 }, + [CTA_NAT_MAXIP] = { .type = NLA_U32 }, +}; + +static int +nfnetlink_parse_nat(struct nlattr *nat, + const struct nf_conn *ct, struct nf_nat_range *range) +{ + struct nlattr *tb[CTA_NAT_MAX+1]; + int err; + + memset(range, 0, sizeof(*range)); + + err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); + if (err < 0) + return err; + + if (tb[CTA_NAT_MINIP]) + range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]); + + if (!tb[CTA_NAT_MAXIP]) + range->max_ip = range->min_ip; + else + range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); + + if (range->min_ip) + range->flags |= IP_NAT_RANGE_MAP_IPS; + + if (!tb[CTA_NAT_PROTO]) + return 0; + + err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); + if (err < 0) + return err; + + return 0; +} + +static int +nfnetlink_parse_nat_setup(struct nf_conn *ct, + enum nf_nat_manip_type manip, + struct nlattr *attr) +{ + struct nf_nat_range range; + + if (nfnetlink_parse_nat(attr, ct, &range) < 0) + return -EINVAL; + if (nf_nat_initialized(ct, manip)) + return -EEXIST; + + return nf_nat_setup_info(ct, &range, manip); +} +#else +static int +nfnetlink_parse_nat_setup(struct nf_conn *ct, + enum nf_nat_manip_type manip, + struct nlattr *attr) +{ + return -EOPNOTSUPP; +} +#endif + +static int __net_init nf_nat_net_init(struct net *net) +{ + net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, + &net->ipv4.nat_vmalloced); + if (!net->ipv4.nat_bysource) + return -ENOMEM; + return 0; +} + +/* Clear NAT section of all conntracks, in case we're loaded again. */ +static int clean_nat(struct nf_conn *i, void *data) +{ + struct nf_conn_nat *nat = nfct_nat(i); + + if (!nat) + return 0; + memset(nat, 0, sizeof(*nat)); + i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); + return 0; +} + +static void __net_exit nf_nat_net_exit(struct net *net) +{ + nf_ct_iterate_cleanup(net, &clean_nat, NULL); + synchronize_rcu(); + nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, + nf_nat_htable_size); +} + +static struct pernet_operations nf_nat_net_ops = { + .init = nf_nat_net_init, + .exit = nf_nat_net_exit, +}; + static int __init nf_nat_init(void) { size_t i; @@ -599,12 +726,9 @@ static int __init nf_nat_init(void) /* Leave them the same for the moment. */ nf_nat_htable_size = nf_conntrack_htable_size; - bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, - &nf_nat_vmalloced); - if (!bysource) { - ret = -ENOMEM; + ret = register_pernet_subsys(&nf_nat_net_ops); + if (ret < 0) goto cleanup_extend; - } /* Sew in builtin protocols. */ spin_lock_bh(&nf_nat_lock); @@ -622,6 +746,9 @@ static int __init nf_nat_init(void) BUG_ON(nf_nat_seq_adjust_hook != NULL); rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); + BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); + rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, + nfnetlink_parse_nat_setup); return 0; cleanup_extend: @@ -629,30 +756,18 @@ static int __init nf_nat_init(void) return ret; } -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int clean_nat(struct nf_conn *i, void *data) -{ - struct nf_conn_nat *nat = nfct_nat(i); - - if (!nat) - return 0; - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); - return 0; -} - static void __exit nf_nat_cleanup(void) { - nf_ct_iterate_cleanup(&clean_nat, NULL); - synchronize_rcu(); - nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); + unregister_pernet_subsys(&nf_nat_net_ops); nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); + rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); synchronize_net(); } MODULE_LICENSE("GPL"); +MODULE_ALIAS("nf-nat-ipv4"); module_init(nf_nat_init); module_exit(nf_nat_cleanup); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 11976ea29884..cf7a42bf9820 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -16,6 +16,7 @@ #include <linux/udp.h> #include <net/checksum.h> #include <net/tcp.h> +#include <net/route.h> #include <linux/netfilter_ipv4.h> #include <net/netfilter/nf_conntrack.h> @@ -192,7 +193,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, CTINFO2DIR(ctinfo)); - nf_conntrack_event_cache(IPCT_NATSEQADJ, skb); + nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } return 1; } diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index da3d91a5ef5c..9eb171056c63 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -40,6 +40,7 @@ MODULE_ALIAS("ip_nat_pptp"); static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { + struct net *net = nf_ct_net(ct); const struct nf_conn *master = ct->master; struct nf_conntrack_expect *other_exp; struct nf_conntrack_tuple t; @@ -73,7 +74,7 @@ static void pptp_nat_expected(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple_ip(&t); - other_exp = nf_ct_expect_find_get(&t); + other_exp = nf_ct_expect_find_get(net, &t); if (other_exp) { nf_ct_unexpect_related(other_exp); nf_ct_expect_put(other_exp); diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index e8b4d0d4439e..bea54a685109 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -33,7 +33,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[3]; struct ipt_error term; -} nat_initial_table __initdata = { +} nat_initial_table __net_initdata = { .repl = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, @@ -58,47 +58,42 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table __nat_table = { +static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .lock = __RW_LOCK_UNLOCKED(__nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; -static struct xt_table *nat_table; /* Source NAT */ -static unsigned int ipt_snat_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; - NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING); + NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); - NF_CT_ASSERT(out); + NF_CT_ASSERT(par->out != NULL); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); } /* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ -static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) +static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip) { static int warned = 0; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; struct rtable *rt; - if (ip_route_output_key(&init_net, &rt, &fl) != 0) + if (ip_route_output_key(net, &rt, &fl) != 0) return; if (rt->rt_src != srcip && !warned) { @@ -110,40 +105,32 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) ip_rt_put(rt); } -static unsigned int ipt_dnat_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING || - hooknum == NF_INET_LOCAL_OUT); + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - if (hooknum == NF_INET_LOCAL_OUT && + if (par->hooknum == NF_INET_LOCAL_OUT && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) - warn_if_extra_mangle(ip_hdr(skb)->daddr, + warn_if_extra_mangle(dev_net(par->out), ip_hdr(skb)->daddr, mr->range[0].min_ip); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); } -static bool ipt_snat_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_snat_checkentry(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { @@ -153,13 +140,9 @@ static bool ipt_snat_checkentry(const char *tablename, return true; } -static bool ipt_dnat_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { @@ -194,9 +177,10 @@ int nf_nat_rule_find(struct sk_buff *skb, const struct net_device *out, struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); int ret; - ret = ipt_do_table(skb, hooknum, in, out, nat_table); + ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) @@ -226,14 +210,32 @@ static struct xt_target ipt_dnat_reg __read_mostly = { .family = AF_INET, }; +static int __net_init nf_nat_rule_net_init(struct net *net) +{ + net->ipv4.nat_table = ipt_register_table(net, &nat_table, + &nat_initial_table.repl); + if (IS_ERR(net->ipv4.nat_table)) + return PTR_ERR(net->ipv4.nat_table); + return 0; +} + +static void __net_exit nf_nat_rule_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.nat_table); +} + +static struct pernet_operations nf_nat_rule_net_ops = { + .init = nf_nat_rule_net_init, + .exit = nf_nat_rule_net_exit, +}; + int __init nf_nat_rule_init(void) { int ret; - nat_table = ipt_register_table(&init_net, &__nat_table, - &nat_initial_table.repl); - if (IS_ERR(nat_table)) - return PTR_ERR(nat_table); + ret = register_pernet_subsys(&nf_nat_rule_net_ops); + if (ret != 0) + goto out; ret = xt_register_target(&ipt_snat_reg); if (ret != 0) goto unregister_table; @@ -247,8 +249,8 @@ int __init nf_nat_rule_init(void) unregister_snat: xt_unregister_target(&ipt_snat_reg); unregister_table: - ipt_unregister_table(nat_table); - + unregister_pernet_subsys(&nf_nat_rule_net_ops); + out: return ret; } @@ -256,5 +258,5 @@ void nf_nat_rule_cleanup(void) { xt_unregister_target(&ipt_dnat_reg); xt_unregister_target(&ipt_snat_reg); - ipt_unregister_table(nat_table); + unregister_pernet_subsys(&nf_nat_rule_net_ops); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6ee5354c9aa1..942be04e7955 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -282,6 +282,8 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) struct rtable *r = NULL; for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { + if (!rt_hash_table[st->bucket].chain) + continue; rcu_read_lock_bh(); r = rcu_dereference(rt_hash_table[st->bucket].chain); while (r) { @@ -299,11 +301,14 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, struct rtable *r) { struct rt_cache_iter_state *st = seq->private; + r = r->u.dst.rt_next; while (!r) { rcu_read_unlock_bh(); - if (--st->bucket < 0) - break; + do { + if (--st->bucket < 0) + return NULL; + } while (!rt_hash_table[st->bucket].chain); rcu_read_lock_bh(); r = rt_hash_table[st->bucket].chain; } @@ -2356,11 +2361,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, ipv4_is_zeronet(oldflp->fl4_src)) goto out; - /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = ip_dev_find(net, oldflp->fl4_src); - if (dev_out == NULL) - goto out; - /* I removed check for oif == dev_out->oif here. It was wrong for two reasons: 1. ip_dev_find(net, saddr) can return wrong iface, if saddr @@ -2372,6 +2372,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, if (oldflp->oif == 0 && (ipv4_is_multicast(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ + dev_out = ip_dev_find(net, oldflp->fl4_src); + if (dev_out == NULL) + goto out; + /* Special hack: user can direct multicasts and limited broadcast via necessary interface without fiddling with IP_MULTICAST_IF or IP_PKTINFO. @@ -2390,9 +2395,15 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, fl.oif = dev_out->ifindex; goto make_route; } - if (dev_out) + + if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ + dev_out = ip_dev_find(net, oldflp->fl4_src); + if (dev_out == NULL) + goto out; dev_put(dev_out); - dev_out = NULL; + dev_out = NULL; + } } @@ -2840,7 +2851,9 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) if (s_h < 0) s_h = 0; s_idx = idx = cb->args[1]; - for (h = s_h; h <= rt_hash_mask; h++) { + for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) { + if (!rt_hash_table[h].chain) + continue; rcu_read_lock_bh(); for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; rt = rcu_dereference(rt->u.dst.rt_next), idx++) { @@ -2859,7 +2872,6 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) dst_release(xchg(&skb->dst, NULL)); } rcu_read_unlock_bh(); - s_idx = 0; } done: @@ -2896,8 +2908,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, } static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, - int __user *name, - int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, @@ -2960,16 +2970,13 @@ static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, } static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, - int __user *name, - int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { int old = ip_rt_secret_interval; - int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval, - newlen); + int ret = sysctl_jiffies(table, oldval, oldlenp, newval, newlen); rt_secret_reschedule(old); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9d38005abbac..d346c22aa6ae 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -16,6 +16,7 @@ #include <linux/cryptohash.h> #include <linux/kernel.h> #include <net/tcp.h> +#include <net/route.h> /* Timestamps: lowest 9 bits store TCP options */ #define TSBITS 9 @@ -296,6 +297,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; req->mss = mss; + ireq->loc_port = th->dest; ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; @@ -337,6 +339,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = IPPROTO_TCP, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = th->dest, .dport = th->source } } }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e0689fd7b798..1bb10df8ce7d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -26,16 +26,13 @@ static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; -extern seqlock_t sysctl_port_range_lock; -extern int sysctl_local_port_range[2]; - /* Update system visible IP port range */ static void set_local_port_range(int range[2]) { - write_seqlock(&sysctl_port_range_lock); - sysctl_local_port_range[0] = range[0]; - sysctl_local_port_range[1] = range[1]; - write_sequnlock(&sysctl_port_range_lock); + write_seqlock(&sysctl_local_ports.lock); + sysctl_local_ports.range[0] = range[0]; + sysctl_local_ports.range[1] = range[1]; + write_sequnlock(&sysctl_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -44,8 +41,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, size_t *lenp, loff_t *ppos) { int ret; - int range[2] = { sysctl_local_port_range[0], - sysctl_local_port_range[1] }; + int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), @@ -54,6 +50,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, .extra2 = &ip_local_port_range_max, }; + inet_get_local_port_range(range, range + 1); ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); if (write && ret == 0) { @@ -67,14 +64,13 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, } /* Validate changes from sysctl interface. */ -static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, - int nlen, void __user *oldval, +static int ipv4_sysctl_local_port_range(ctl_table *table, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { int ret; - int range[2] = { sysctl_local_port_range[0], - sysctl_local_port_range[1] }; + int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), @@ -83,7 +79,8 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, .extra2 = &ip_local_port_range_max, }; - ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); + inet_get_local_port_range(range, range + 1); + ret = sysctl_intvec(&tmp, oldval, oldlenp, newval, newlen); if (ret == 0 && newval && newlen) { if (range[1] < range[0]) ret = -EINVAL; @@ -112,8 +109,8 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * return ret; } -static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, - int nlen, void __user *oldval, +static int sysctl_tcp_congestion_control(ctl_table *table, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -125,7 +122,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int ret; tcp_get_default_congestion_control(val); - ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); + ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); if (ret == 1 && newval && newlen) ret = tcp_set_default_congestion_control(val); return ret; @@ -168,8 +165,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return ret; } -static int strategy_allowed_congestion_control(ctl_table *table, int __user *name, - int nlen, void __user *oldval, +static int strategy_allowed_congestion_control(ctl_table *table, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) @@ -182,7 +179,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam return -ENOMEM; tcp_get_available_congestion_control(tbl.data, tbl.maxlen); - ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); + ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); if (ret == 1 && newval && newlen) ret = tcp_set_allowed_congestion_control(tbl.data); kfree(tbl.data); @@ -396,8 +393,8 @@ static struct ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, .procname = "ip_local_port_range", - .data = &sysctl_local_port_range, - .maxlen = sizeof(sysctl_local_port_range), + .data = &sysctl_local_ports.range, + .maxlen = sizeof(sysctl_local_ports.range), .mode = 0644, .proc_handler = &ipv4_local_port_range, .strategy = &ipv4_sysctl_local_port_range, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1ab341e5d3e0..eccb7165a80c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -384,13 +384,17 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Connected? */ if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) { + int target = sock_rcvlowat(sk, 0, INT_MAX); + + if (tp->urg_seq == tp->copied_seq && + !sock_flag(sk, SOCK_URGINLINE) && + tp->urg_data) + target--; + /* Potential race condition. If read of tp below will * escape above sk->sk_state, we can be illegally awaken * in SYN_* states. */ - if ((tp->rcv_nxt != tp->copied_seq) && - (tp->urg_seq != tp->copied_seq || - tp->rcv_nxt != tp->copied_seq + 1 || - sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data)) + if (tp->rcv_nxt - tp->copied_seq >= target) mask |= POLLIN | POLLRDNORM; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { @@ -493,10 +497,8 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, struct sk_buff *skb) { - if (flags & MSG_OOB) { - tp->urg_mode = 1; + if (flags & MSG_OOB) tp->snd_up = tp->write_seq; - } } static inline void tcp_push(struct sock *sk, int flags, int mss_now, @@ -1157,7 +1159,7 @@ static void tcp_prequeue_process(struct sock *sk) * necessary */ local_bh_disable(); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk->sk_backlog_rcv(sk, skb); + sk_backlog_rcv(sk, skb); local_bh_enable(); /* Clear memory counter. */ diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index bfcbd148a89d..c209e054a634 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -150,7 +150,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) ca->snd_cwnd_cents -= 128; tp->snd_cwnd_cnt = 0; } - + /* check when cwnd has not been incremented for a while */ + if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) { + tp->snd_cwnd++; + tp->snd_cwnd_cnt = 0; + } /* clamp down slowstart cwnd to ssthresh value. */ if (is_slowstart) tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 67ccce2a96bd..d77c0d29e239 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -979,6 +979,39 @@ static void tcp_update_reordering(struct sock *sk, const int metric, } } +/* This must be called before lost_out is incremented */ +static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) +{ + if ((tp->retransmit_skb_hint == NULL) || + before(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) + tp->retransmit_skb_hint = skb; + + if (!tp->lost_out || + after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high)) + tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; +} + +static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) +{ + if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { + tcp_verify_retransmit_hint(tp, skb); + + tp->lost_out += tcp_skb_pcount(skb); + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + } +} + +void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) +{ + tcp_verify_retransmit_hint(tp, skb); + + if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { + tp->lost_out += tcp_skb_pcount(skb); + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + } +} + /* This procedure tags the retransmission queue when SACKs arrive. * * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). @@ -1155,13 +1188,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; - - if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { - tp->lost_out += tcp_skb_pcount(skb); - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - } + tcp_skb_mark_lost_uncond_verify(tp, skb); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); } else { if (before(ack_seq, new_low_seq)) @@ -1271,9 +1298,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); tp->lost_out -= tcp_skb_pcount(skb); tp->retrans_out -= tcp_skb_pcount(skb); - - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; } } else { if (!(sacked & TCPCB_RETRANS)) { @@ -1292,9 +1316,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; tp->lost_out -= tcp_skb_pcount(skb); - - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; } } @@ -1324,7 +1345,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - tp->retransmit_skb_hint = NULL; } return flag; @@ -1726,6 +1746,8 @@ int tcp_use_frto(struct sock *sk) return 0; skb = tcp_write_queue_head(sk); + if (tcp_skb_is_last(sk, skb)) + return 1; skb = tcp_write_queue_next(sk, skb); /* Skips head */ tcp_for_write_queue_from(skb, sk) { if (skb == tcp_send_head(sk)) @@ -1867,6 +1889,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); + tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } } tcp_verify_left_out(tp); @@ -1883,7 +1906,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); - tcp_clear_retrans_hints_partial(tp); + tcp_clear_all_retrans_hints(tp); } static void tcp_clear_retrans_partial(struct tcp_sock *tp) @@ -1934,12 +1957,11 @@ void tcp_enter_loss(struct sock *sk, int how) /* Push undo marker, if it was plain RTO and nothing * was retransmitted. */ tp->undo_marker = tp->snd_una; - tcp_clear_retrans_hints_partial(tp); } else { tp->sacked_out = 0; tp->fackets_out = 0; - tcp_clear_all_retrans_hints(tp); } + tcp_clear_all_retrans_hints(tp); tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) @@ -1952,6 +1974,7 @@ void tcp_enter_loss(struct sock *sk, int how) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); + tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } } tcp_verify_left_out(tp); @@ -2157,19 +2180,6 @@ static int tcp_time_to_recover(struct sock *sk) return 0; } -/* RFC: This is from the original, I doubt that this is necessary at all: - * clear xmit_retrans hint if seq of this skb is beyond hint. How could we - * retransmitted past LOST markings in the first place? I'm not fully sure - * about undo and end of connection cases, which can cause R without L? - */ -static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) -{ - if ((tp->retransmit_skb_hint != NULL) && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) - tp->retransmit_skb_hint = NULL; -} - /* Mark head of queue up as lost. With RFC3517 SACK, the packets is * is against sacked "cnt", otherwise it's against facked "cnt" */ @@ -2217,11 +2227,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) cnt = packets; } - if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - tcp_verify_retransmit_hint(tp, skb); - } + tcp_skb_mark_lost(tp, skb); } tcp_verify_left_out(tp); } @@ -2263,11 +2269,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) if (!tcp_skb_timedout(sk, skb)) break; - if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - tcp_verify_retransmit_hint(tp, skb); - } + tcp_skb_mark_lost(tp, skb); } tp->scoreboard_skb_hint = skb; @@ -2378,10 +2380,6 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) } tcp_moderate_cwnd(tp); tp->snd_cwnd_stamp = tcp_time_stamp; - - /* There is something screwy going on with the retrans hints after - an undo */ - tcp_clear_all_retrans_hints(tp); } static inline int tcp_may_undo(struct tcp_sock *tp) @@ -2838,7 +2836,8 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. */ -static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) +static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, + u32 prior_snd_una) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2848,6 +2847,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) int flag = 0; u32 pkts_acked = 0; u32 reord = tp->packets_out; + u32 prior_sacked = tp->sacked_out; s32 seq_rtt = -1; s32 ca_seq_rtt = -1; ktime_t last_ackt = net_invalid_timestamp(); @@ -2904,9 +2904,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; - if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up))) - tp->urg_mode = 0; - tp->packets_out -= acked_pcount; pkts_acked += acked_pcount; @@ -2929,9 +2926,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); - tcp_clear_all_retrans_hints(tp); + tp->scoreboard_skb_hint = NULL; + if (skb == tp->retransmit_skb_hint) + tp->retransmit_skb_hint = NULL; + if (skb == tp->lost_skb_hint) + tp->lost_skb_hint = NULL; } + if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una))) + tp->snd_up = tp->snd_una; + if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; @@ -2948,6 +2952,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) /* Non-retransmitted hole got filled? That's reordering */ if (reord < prior_fackets) tcp_update_reordering(sk, tp->fackets_out - reord, 0); + + /* No need to care for underflows here because + * the lost_skb_hint gets NULLed if we're past it + * (or something non-trivial happened) + */ + if (tcp_is_fack(tp)) + tp->lost_cnt_hint -= pkts_acked; + else + tp->lost_cnt_hint -= prior_sacked - tp->sacked_out; } tp->fackets_out -= min(pkts_acked, tp->fackets_out); @@ -3299,7 +3312,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, prior_fackets); + flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); @@ -3442,6 +3455,22 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, } } +static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) +{ + __be32 *ptr = (__be32 *)(th + 1); + + if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) + | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { + tp->rx_opt.saw_tstamp = 1; + ++ptr; + tp->rx_opt.rcv_tsval = ntohl(*ptr); + ++ptr; + tp->rx_opt.rcv_tsecr = ntohl(*ptr); + return 1; + } + return 0; +} + /* Fast parse options. This hopes to only see timestamps. * If it is wrong it falls back on tcp_parse_options(). */ @@ -3453,16 +3482,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, return 0; } else if (tp->rx_opt.tstamp_ok && th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { - __be32 *ptr = (__be32 *)(th + 1); - if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) - | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { - tp->rx_opt.saw_tstamp = 1; - ++ptr; - tp->rx_opt.rcv_tsval = ntohl(*ptr); - ++ptr; - tp->rx_opt.rcv_tsecr = ntohl(*ptr); + if (tcp_parse_aligned_timestamp(tp, th)) return 1; - } } tcp_parse_options(skb, &tp->rx_opt, 1); return 1; @@ -4138,7 +4159,7 @@ drop: skb1 = skb1->prev; } } - __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue); + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); /* And clean segments covered by new one as whole. */ while ((skb1 = skb->next) != @@ -4161,6 +4182,18 @@ add_sack: } } +static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *list) +{ + struct sk_buff *next = skb->next; + + __skb_unlink(skb, list); + __kfree_skb(skb); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); + + return next; +} + /* Collapse contiguous sequence of skbs head..tail with * sequence numbers start..end. * Segments with FIN/SYN are not collapsed (only because this @@ -4178,11 +4211,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, for (skb = head; skb != tail;) { /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - struct sk_buff *next = skb->next; - __skb_unlink(skb, list); - __kfree_skb(skb); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); - skb = next; + skb = tcp_collapse_one(sk, skb, list); continue; } @@ -4228,7 +4257,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, memcpy(nskb->head, skb->head, header); memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_insert(nskb, skb->prev, skb, list); + __skb_queue_before(list, skb, nskb); skb_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ @@ -4246,11 +4275,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, start += size; } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - struct sk_buff *next = skb->next; - __skb_unlink(skb, list); - __kfree_skb(skb); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); - skb = next; + skb = tcp_collapse_one(sk, skb, list); if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) @@ -4436,8 +4461,8 @@ static void tcp_new_space(struct sock *sk) if (tcp_should_expand_sndbuf(sk)) { int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + - MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), - demanded = max_t(unsigned int, tp->snd_cwnd, + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); + int demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering + 1); sndmem *= 2 * demanded; if (sndmem > sk->sk_sndbuf) @@ -4691,6 +4716,67 @@ out: } #endif /* CONFIG_NET_DMA */ +/* Does PAWS and seqno based validation of an incoming segment, flags will + * play significant role here. + */ +static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, + struct tcphdr *th, int syn_inerr) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* RFC1323: H1. Apply PAWS check first. */ + if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && + tcp_paws_discard(sk, skb)) { + if (!th->rst) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); + tcp_send_dupack(sk, skb); + goto discard; + } + /* Reset is accepted even if it did not pass PAWS. */ + } + + /* Step 1: check sequence number */ + if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { + /* RFC793, page 37: "In all states except SYN-SENT, all reset + * (RST) segments are validated by checking their SEQ-fields." + * And page 69: "If an incoming segment is not acceptable, + * an acknowledgment should be sent in reply (unless the RST + * bit is set, if so drop the segment and return)". + */ + if (!th->rst) + tcp_send_dupack(sk, skb); + goto discard; + } + + /* Step 2: check RST bit */ + if (th->rst) { + tcp_reset(sk); + goto discard; + } + + /* ts_recent update must be made after we are sure that the packet + * is in window. + */ + tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + + /* step 3: check security and precedence [ignored] */ + + /* step 4: Check for a SYN in window. */ + if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { + if (syn_inerr) + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); + tcp_reset(sk); + return -1; + } + + return 1; + +discard: + __kfree_skb(skb); + return 0; +} + /* * TCP receive function for the ESTABLISHED state. * @@ -4718,6 +4804,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) { struct tcp_sock *tp = tcp_sk(sk); + int res; /* * Header prediction. @@ -4756,19 +4843,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Check timestamp */ if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { - __be32 *ptr = (__be32 *)(th + 1); - /* No? Slow path! */ - if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) - | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) + if (!tcp_parse_aligned_timestamp(tp, th)) goto slow_path; - tp->rx_opt.saw_tstamp = 1; - ++ptr; - tp->rx_opt.rcv_tsval = ntohl(*ptr); - ++ptr; - tp->rx_opt.rcv_tsecr = ntohl(*ptr); - /* If PAWS failed, check it more carefully in slow path */ if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) goto slow_path; @@ -4879,7 +4957,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, goto no_ack; } - __tcp_ack_snd_check(sk, 0); + if (!copied_early || tp->rcv_nxt != tp->rcv_wup) + __tcp_ack_snd_check(sk, 0); no_ack: #ifdef CONFIG_NET_DMA if (copied_early) @@ -4899,51 +4978,12 @@ slow_path: goto csum_error; /* - * RFC1323: H1. Apply PAWS check first. - */ - if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && - tcp_paws_discard(sk, skb)) { - if (!th->rst) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); - tcp_send_dupack(sk, skb); - goto discard; - } - /* Resets are accepted even if PAWS failed. - - ts_recent update must be made after we are sure - that the packet is in window. - */ - } - - /* * Standard slow path. */ - if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { - /* RFC793, page 37: "In all states except SYN-SENT, all reset - * (RST) segments are validated by checking their SEQ-fields." - * And page 69: "If an incoming segment is not acceptable, - * an acknowledgment should be sent in reply (unless the RST bit - * is set, if so drop the segment and return)". - */ - if (!th->rst) - tcp_send_dupack(sk, skb); - goto discard; - } - - if (th->rst) { - tcp_reset(sk); - goto discard; - } - - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); - tcp_reset(sk); - return 1; - } + res = tcp_validate_incoming(sk, skb, th, 1); + if (res <= 0) + return -res; step5: if (th->ack) @@ -5225,6 +5265,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); int queued = 0; + int res; tp->rx_opt.saw_tstamp = 0; @@ -5277,42 +5318,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; } - if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && - tcp_paws_discard(sk, skb)) { - if (!th->rst) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); - tcp_send_dupack(sk, skb); - goto discard; - } - /* Reset is accepted even if it did not pass PAWS. */ - } - - /* step 1: check sequence number */ - if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { - if (!th->rst) - tcp_send_dupack(sk, skb); - goto discard; - } - - /* step 2: check RST bit */ - if (th->rst) { - tcp_reset(sk); - goto discard; - } - - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - - /* step 3: check security and precedence [ignored] */ - - /* step 4: - * - * Check for a SYN in window. - */ - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); - tcp_reset(sk); - return 1; - } + res = tcp_validate_incoming(sk, skb, th, 0); + if (res <= 0) + return -res; /* step 5: check the ACK field */ if (th->ack) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 44c1e934824b..5c8fa7f1e327 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -583,14 +583,15 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) rep.th.doff = arg.iov[0].iov_len / 4; tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], - key, ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, &rep.th); + key, ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, &rep.th); } #endif arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ - sizeof(struct tcphdr), IPPROTO_TCP, 0); + arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; + arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; net = dev_net(skb->dst->dev); ip_send_reply(net->ipv4.tcp_sock, skb, @@ -606,7 +607,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, int oif, - struct tcp_md5sig_key *key) + struct tcp_md5sig_key *key, + int reply_flags) { struct tcphdr *th = tcp_hdr(skb); struct { @@ -618,7 +620,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ]; } rep; struct ip_reply_arg arg; - struct net *net = dev_net(skb->dev); + struct net *net = dev_net(skb->dst->dev); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -659,6 +661,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ip_hdr(skb)->daddr, &rep.th); } #endif + arg.flags = reply_flags; arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); @@ -681,7 +684,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent, tw->tw_bound_dev_if, - tcp_twsk_md5_key(tcptw) + tcp_twsk_md5_key(tcptw), + tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0 ); inet_twsk_put(tw); @@ -694,7 +698,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, 0, - tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr)); + tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), + inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0); } /* @@ -1244,6 +1249,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(sk, skb); if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); @@ -1364,6 +1370,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric(dst, RTAX_ADVMSS); + if (tcp_sk(sk)->rx_opt.user_mss && + tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) + newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; + tcp_initialize_rcv_mss(newsk); #ifdef CONFIG_TCP_MD5SIG @@ -1567,8 +1577,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = iph->tos; TCP_SKB_CB(skb)->sacked = 0; - sk = __inet_lookup(net, &tcp_hashinfo, iph->saddr, - th->source, iph->daddr, th->dest, inet_iif(skb)); + sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); if (!sk) goto no_tcp_socket; @@ -1946,6 +1955,12 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) return rc; } +static inline int empty_bucket(struct tcp_iter_state *st) +{ + return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) && + hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain); +} + static void *established_get_first(struct seq_file *seq) { struct tcp_iter_state* st = seq->private; @@ -1958,6 +1973,10 @@ static void *established_get_first(struct seq_file *seq) struct inet_timewait_sock *tw; rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); + /* Lockless fast path for the common case of empty buckets */ + if (empty_bucket(st)) + continue; + read_lock_bh(lock); sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family || @@ -2008,13 +2027,15 @@ get_tw: read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); st->state = TCP_SEQ_STATE_ESTABLISHED; - if (++st->bucket < tcp_hashinfo.ehash_size) { - read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); - sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); - } else { - cur = NULL; - goto out; - } + /* Look for next non empty bucket */ + while (++st->bucket < tcp_hashinfo.ehash_size && + empty_bucket(st)) + ; + if (st->bucket >= tcp_hashinfo.ehash_size) + return NULL; + + read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); } else sk = sk_next(sk); @@ -2376,6 +2397,7 @@ static int __net_init tcp_sk_init(struct net *net) static void __net_exit tcp_sk_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv4.tcp_sock); + inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET); } static struct pernet_operations __net_initdata tcp_sk_ops = { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f976fc57892c..779f2e9d0689 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -395,6 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->pred_flags = 0; newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1; + newtp->snd_up = treq->snt_isn + 1; tcp_prequeue_init(newtp); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8165f5aa8c71..990a58493235 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -345,6 +345,11 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) TCP_SKB_CB(skb)->end_seq = seq; } +static inline int tcp_urg_mode(const struct tcp_sock *tp) +{ + return tp->snd_una != tp->snd_up; +} + #define OPTION_SACK_ADVERTISE (1 << 0) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) @@ -646,7 +651,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->check = 0; th->urg_ptr = 0; - if (unlikely(tp->urg_mode && + /* The urg_mode check is necessary during a below snd_una win probe */ + if (unlikely(tcp_urg_mode(tp) && between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { th->urg_ptr = htons(tp->snd_up - tcb->seq); th->urg = 1; @@ -1012,7 +1018,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. * - * LARGESEND note: !urg_mode is overkill, only frames up to snd_up + * LARGESEND note: !tcp_urg_mode is overkill, only frames up to snd_up * cannot be large. However, taking into account rare use of URG, this * is not a big flaw. */ @@ -1029,7 +1035,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now = tp->mss_cache; - if (large_allowed && sk_can_gso(sk) && !tp->urg_mode) + if (large_allowed && sk_can_gso(sk) && !tcp_urg_mode(tp)) doing_tso = 1; if (dst) { @@ -1193,7 +1199,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, /* Don't use the nagle rule for urgent data (or for the final FIN). * Nagle can be ignored during F-RTO too (see RFC4138). */ - if (tp->urg_mode || (tp->frto_counter == 2) || + if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) return 1; @@ -1824,6 +1830,8 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, /* changed transmit queue under us so clear hints */ tcp_clear_retrans_hints_partial(tp); + if (next_skb == tp->retransmit_skb_hint) + tp->retransmit_skb_hint = skb; sk_wmem_free_skb(sk, next_skb); } @@ -1838,7 +1846,7 @@ void tcp_simple_retransmit(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; unsigned int mss = tcp_current_mss(sk, 0); - int lost = 0; + u32 prior_lost = tp->lost_out; tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) @@ -1849,17 +1857,13 @@ void tcp_simple_retransmit(struct sock *sk) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); } - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - lost = 1; - } + tcp_skb_mark_lost_uncond_verify(tp, skb); } } - tcp_clear_all_retrans_hints(tp); + tcp_clear_retrans_hints_partial(tp); - if (!lost) + if (prior_lost == tp->lost_out) return; if (tcp_is_reno(tp)) @@ -1934,8 +1938,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) /* Collapse two adjacent packets if worthwhile and we can. */ if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && (skb->len < (cur_mss >> 1)) && - (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && (!tcp_skb_is_last(sk, skb)) && + (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && (tcp_skb_pcount(skb) == 1 && @@ -1996,86 +2000,18 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return err; } -/* This gets called after a retransmit timeout, and the initially - * retransmitted data is acknowledged. It tries to continue - * resending the rest of the retransmit queue, until either - * we've sent it all or the congestion window limit is reached. - * If doing SACK, the first ACK which comes back for a timeout - * based retransmit packet might feed us FACK information again. - * If so, we use it to avoid unnecessarily retransmissions. - */ -void tcp_xmit_retransmit_queue(struct sock *sk) +static int tcp_can_forward_retransmit(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - int packet_cnt; - - if (tp->retransmit_skb_hint) { |