From 0542b69e2c57fc9668ce6a03155bea6e1f557901 Mon Sep 17 00:00:00 2001 From: dpward Date: Wed, 31 Aug 2011 06:05:27 +0000 Subject: net: Make flow cache namespace-aware flow_cache_lookup will return a cached object (or null pointer) that the resolver (i.e. xfrm_policy_lookup) previously found for another namespace using the same key/family/dir. Instead, make the namespace part of what identifies entries in the cache. As before, flow_entry_valid will return 0 for entries where the namespace has been deleted, and they will be removed from the cache the next time flow_cache_gc_task is run. Reported-by: Andrew Dickinson Signed-off-by: David Ward Signed-off-by: David S. Miller --- net/core/flow.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/flow.c b/net/core/flow.c index bf32c33cad3b..47b6d26c2afb 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -30,6 +30,7 @@ struct flow_cache_entry { struct hlist_node hlist; struct list_head gc_list; } u; + struct net *net; u16 family; u8 dir; u32 genid; @@ -232,7 +233,8 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, hash = flow_hash_code(fc, fcp, key); hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { - if (tfle->family == family && + if (tfle->net == net && + tfle->family == family && tfle->dir == dir && flow_key_compare(key, &tfle->key) == 0) { fle = tfle; @@ -246,6 +248,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { + fle->net = net; fle->family = family; fle->dir = dir; memcpy(&fle->key, key, sizeof(*key)); -- cgit v1.2.1 From 48c830120f2a20b44220aa26feda9ed15f49eaab Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 31 Aug 2011 08:03:29 +0000 Subject: net: copy userspace buffers on device forwarding dev_forward_skb loops an skb back into host networking stack which might hang on the memory indefinitely. In particular, this can happen in macvtap in bridged mode. Copy the userspace fragments to avoid blocking the sender in that case. As this patch makes skb_copy_ubufs extern now, I also added some documentation and made it clear the SKBTX_DEV_ZEROCOPY flag automatically instead of doing it in all callers. This can be made into a separate patch if people feel it's worth it. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++++ net/core/skbuff.c | 22 +++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 17d67b579beb..b10ff0a71855 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1515,6 +1515,14 @@ static inline bool is_skb_forwardable(struct net_device *dev, */ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, GFP_ATOMIC)) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + } + skb_orphan(skb); nf_reset(skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 27002dffe7ed..387703f56fce 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -611,8 +611,21 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); -/* skb frags copy userspace buffers to kernel */ -static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) +/* skb_copy_ubufs - copy userspace skb frags buffers to kernel + * @skb: the skb to modify + * @gfp_mask: allocation priority + * + * This must be called on SKBTX_DEV_ZEROCOPY skb. + * It will copy all frags into kernel and drop the reference + * to userspace pages. + * + * If this function is called from an interrupt gfp_mask() must be + * %GFP_ATOMIC. + * + * Returns 0 on success or a negative error code on failure + * to allocate kernel memory to copy to. + */ +int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) { int i; int num_frags = skb_shinfo(skb)->nr_frags; @@ -652,6 +665,8 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) skb_shinfo(skb)->frags[i - 1].page = head; head = (struct page *)head->private; } + + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; return 0; } @@ -677,7 +692,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) return NULL; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } n = skb + 1; @@ -803,7 +817,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) n = NULL; goto out; } - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; @@ -896,7 +909,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) goto nofrags; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); -- cgit v1.2.1 From aa1c366e4febc7f5c2b84958a2dd7cd70e28f9d0 Mon Sep 17 00:00:00 2001 From: dpward Date: Mon, 5 Sep 2011 16:47:24 +0000 Subject: net: Handle different key sizes between address families in flow cache With the conversion of struct flowi to a union of AF-specific structs, some operations on the flow cache need to account for the exact size of the key. Signed-off-by: David Ward Signed-off-by: David S. Miller --- net/core/flow.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'net/core') diff --git a/net/core/flow.c b/net/core/flow.c index 47b6d26c2afb..555a456efb07 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -173,29 +173,26 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - const struct flowi *key) + const struct flowi *key, + size_t keysize) { const u32 *k = (const u32 *) key; + const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) + return jhash2(k, length, fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); } -typedef unsigned long flow_compare_t; - /* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment and - * constant size. + * important assumptions that we can here, such as alignment. */ -static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, + size_t keysize) { const flow_compare_t *k1, *k1_lim, *k2; - const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); - - BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); k1 = (const flow_compare_t *) key1; - k1_lim = k1 + n_elem; + k1_lim = k1 + keysize; k2 = (const flow_compare_t *) key2; @@ -216,6 +213,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache_entry *fle, *tfle; struct hlist_node *entry; struct flow_cache_object *flo; + size_t keysize; unsigned int hash; local_bh_disable(); @@ -223,6 +221,11 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle = NULL; flo = NULL; + + keysize = flow_key_size(family); + if (!keysize) + goto nocache; + /* Packet really early in init? Making flow_cache_init a * pre-smp initcall would solve this. --RR */ if (!fcp->hash_table) @@ -231,12 +234,12 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fcp->hash_rnd_recalc) flow_new_hash_rnd(fc, fcp); - hash = flow_hash_code(fc, fcp, key); + hash = flow_hash_code(fc, fcp, key, keysize); hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { if (tfle->net == net && tfle->family == family && tfle->dir == dir && - flow_key_compare(key, &tfle->key) == 0) { + flow_key_compare(key, &tfle->key, keysize) == 0) { fle = tfle; break; } @@ -251,7 +254,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle->net = net; fle->family = family; fle->dir = dir; - memcpy(&fle->key, key, sizeof(*key)); + memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); fle->object = NULL; hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); fcp->hash_count++; -- cgit v1.2.1 From 561dac2d410ffac0b57a23b85ae0a623c1a076ca Mon Sep 17 00:00:00 2001 From: Gao feng Date: Sun, 11 Sep 2011 15:36:05 +0000 Subject: fib:fix BUG_ON in fib_nl_newrule when add new fib rule add new fib rule can cause BUG_ON happen the reproduce shell is ip rule add pref 38 ip rule add pref 38 ip rule add to 192.168.3.0/24 goto 38 ip rule del pref 38 ip rule add to 192.168.3.0/24 goto 38 ip rule add pref 38 then the BUG_ON will happen del BUG_ON and use (ctarget == NULL) identify whether this rule is unresolved Signed-off-by: Gao feng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/fib_rules.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e7ab0c0285b5..3231b468bb72 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -384,8 +384,8 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) */ list_for_each_entry(r, &ops->rules_list, list) { if (r->action == FR_ACT_GOTO && - r->target == rule->pref) { - BUG_ON(rtnl_dereference(r->ctarget) != NULL); + r->target == rule->pref && + rtnl_dereference(r->ctarget) == NULL) { rcu_assign_pointer(r->ctarget, rule); if (--ops->unresolved_rules == 0) break; -- cgit v1.2.1 From afaef734e5f0004916d07ecf7d86292cdd00d59b Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Mon, 17 Oct 2011 15:20:28 +0000 Subject: fib_rules: fix unresolved_rules counting we should decrease ops->unresolved_rules when deleting a unresolved rule. Signed-off-by: Zheng Yan Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/fib_rules.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 3231b468bb72..27071ee2a4e1 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -475,8 +475,11 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) list_del_rcu(&rule->list); - if (rule->action == FR_ACT_GOTO) + if (rule->action == FR_ACT_GOTO) { ops->nr_goto_rules--; + if (rtnl_dereference(rule->ctarget) == NULL) + ops->unresolved_rules--; + } /* * Check if this rule is a target to any of them. If so, -- cgit v1.2.1