diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-11 21:19:00 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-11 21:19:00 -0400 |
commit | ca321885b0511a85e2d1cd40caafedbeb18f4af6 (patch) | |
tree | 0042e8674aff7ae5785db467836d8d4101906f70 /net/core | |
parent | 052db7ec86dff26f734031c3ef5c2c03a94af0af (diff) | |
parent | 01d2d484e49e9bc0ed9b5fdaf345a0e2bf35ffed (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller:
"This set fixes a bunch of fallout from the changes that went in during
this merge window, particularly:
- Fix fsl_pq_mdio (Claudiu Manoil) and fm10k (Pranith Kumar) build
failures.
- Several networking drivers do atomic_set() on page counts where
that's not exactly legal. From Eric Dumazet.
- Make __skb_flow_get_ports() work cleanly with unaligned data, from
Alexander Duyck.
- Fix some kernel-doc buglets in rfkill and netlabel, from Fabian
Frederick.
- Unbalanced enable_irq_wake usage in bcmgenet and systemport
drivers, from Florian Fainelli.
- pxa168_eth needs to depend on HAS_DMA, from Geert Uytterhoeven.
- Multi-dequeue in the qdisc layer severely bypasses the fairness
limits the previous code used to enforce, reintroduce in a way that
at the same time doesn't compromise bulk dequeue opportunities.
From Jesper Dangaard Brouer.
- macvlan receive path unnecessarily hops through a softirq by using
netif_rx() instead of netif_receive_skb(). From Jason Baron"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (51 commits)
net: systemport: avoid unbalanced enable_irq_wake calls
net: bcmgenet: avoid unbalanced enable_irq_wake calls
net: bcmgenet: fix off-by-one in incrementing read pointer
net: fix races in page->_count manipulation
mlx4: fix race accessing page->_count
ixgbe: fix race accessing page->_count
igb: fix race accessing page->_count
fm10k: fix race accessing page->_count
net/phy: micrel: Add clock support for KSZ8021/KSZ8031
flow-dissector: Fix alignment issue in __skb_flow_get_ports
net: filter: fix the comments
Documentation: replace __sk_run_filter with __bpf_prog_run
macvlan: optimize the receive path
macvlan: pass 'bool' type to macvlan_count_rx()
drivers: net: xgene: Add 10GbE ethtool support
drivers: net: xgene: Add 10GbE support
drivers: net: xgene: Preparing for adding 10GbE support
dtb: Add 10GbE node to APM X-Gene SoC device tree
Documentation: dts: Update section header for APM X-Gene
MAINTAINERS: Update APM X-Gene section
...
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/filter.c | 9 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 36 | ||||
-rw-r--r-- | net/core/skbuff.c | 35 |
3 files changed, 49 insertions, 31 deletions
diff --git a/net/core/filter.c b/net/core/filter.c index fcd3f6742a6a..647b12265e18 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -51,9 +51,9 @@ * @skb: buffer to filter * * Run the filter code and then cut skb->data to correct size returned by - * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller + * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller * than pkt_len we keep whole skb->data. This is the socket level - * wrapper to sk_run_filter. It returns 0 if the packet should + * wrapper to SK_RUN_FILTER. It returns 0 if the packet should * be accepted or -EPERM if the packet should be tossed. * */ @@ -566,11 +566,8 @@ err: /* Security: * - * A BPF program is able to use 16 cells of memory to store intermediate - * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()). - * * As we dont want to clear mem[] array for each packet going through - * sk_run_filter(), we check that filter loaded by user never try to read + * __bpf_prog_run(), we check that filter loaded by user never try to read * a cell if not previously written, and we check all branches to be sure * a malicious user doesn't try to abuse us. */ diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 8560dea58803..45084938c403 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -100,6 +100,13 @@ ip: if (ip_is_fragment(iph)) ip_proto = 0; + /* skip the address processing if skb is NULL. The assumption + * here is that if there is no skb we are not looking for flow + * info but lengths and protocols. + */ + if (!skb) + break; + iph_to_flow_copy_addrs(flow, iph); break; } @@ -114,17 +121,15 @@ ipv6: return false; ip_proto = iph->nexthdr; - flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); - flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); nhoff += sizeof(struct ipv6hdr); - /* skip the flow label processing if skb is NULL. The - * assumption here is that if there is no skb we are not - * looking for flow info as much as we are length. - */ + /* see comment above in IPv4 section */ if (!skb) break; + flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); + flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); + flow_label = ip6_flowlabel(iph); if (flow_label) { /* Awesome, IPv6 packet has a flow label so we can @@ -231,9 +236,13 @@ ipv6: flow->n_proto = proto; flow->ip_proto = ip_proto; - flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); flow->thoff = (u16) nhoff; + /* unless skb is set we don't need to record port info */ + if (skb) + flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, + data, hlen); + return true; } EXPORT_SYMBOL(__skb_flow_dissect); @@ -334,15 +343,16 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data, switch (keys->ip_proto) { case IPPROTO_TCP: { - const struct tcphdr *tcph; - struct tcphdr _tcph; + /* access doff as u8 to avoid unaligned access */ + const u8 *doff; + u8 _doff; - tcph = __skb_header_pointer(skb, poff, sizeof(_tcph), - data, hlen, &_tcph); - if (!tcph) + doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff), + data, hlen, &_doff); + if (!doff) return poff; - poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4); + poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2); break; } case IPPROTO_UDP: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7b3df0d518ab..829d013745ab 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -360,18 +360,29 @@ refill: goto end; } nc->frag.size = PAGE_SIZE << order; -recycle: - atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS); + /* Even if we own the page, we do not use atomic_set(). + * This would break get_page_unless_zero() users. + */ + atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1, + &nc->frag.page->_count); nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; nc->frag.offset = 0; } if (nc->frag.offset + fragsz > nc->frag.size) { - /* avoid unnecessary locked operations if possible */ - if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) || - atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count)) - goto recycle; - goto refill; + if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) { + if (!atomic_sub_and_test(nc->pagecnt_bias, + &nc->frag.page->_count)) + goto refill; + /* OK, page count is 0, we can safely set it */ + atomic_set(&nc->frag.page->_count, + NETDEV_PAGECNT_MAX_BIAS); + } else { + atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias, + &nc->frag.page->_count); + } + nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS; + nc->frag.offset = 0; } data = page_address(nc->frag.page) + nc->frag.offset; @@ -4126,11 +4137,11 @@ EXPORT_SYMBOL(skb_vlan_untag); /** * alloc_skb_with_frags - allocate skb with page frags * - * header_len: size of linear part - * data_len: needed length in frags - * max_page_order: max page order desired. - * errcode: pointer to error code if any - * gfp_mask: allocation mask + * @header_len: size of linear part + * @data_len: needed length in frags + * @max_page_order: max page order desired. + * @errcode: pointer to error code if any + * @gfp_mask: allocation mask * * This can be used to allocate a paged skb, given a maximal order for frags. */ |