Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller: "This set fixes a bunch of fallout from the changes that went in during this merge window, particularly: - Fix fsl_pq_mdio (Claudiu Manoil) and fm10k (Pranith Kumar) build failures. - Several networking drivers do atomic_set() on page counts where that's not exactly legal. From Eric Dumazet. - Make __skb_flow_get_ports() work cleanly with unaligned data, from Alexander Duyck. - Fix some kernel-doc buglets in rfkill and netlabel, from Fabian Frederick. - Unbalanced enable_irq_wake usage in bcmgenet and systemport drivers, from Florian Fainelli. - pxa168_eth needs to depend on HAS_DMA, from Geert Uytterhoeven. - Multi-dequeue in the qdisc layer severely bypasses the fairness limits the previous code used to enforce, reintroduce in a way that at the same time doesn't compromise bulk dequeue opportunities. From Jesper Dangaard Brouer. - macvlan receive path unnecessarily hops through a softirq by using netif_rx() instead of netif_receive_skb(). From Jason Baron" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (51 commits) net: systemport: avoid unbalanced enable_irq_wake calls net: bcmgenet: avoid unbalanced enable_irq_wake calls net: bcmgenet: fix off-by-one in incrementing read pointer net: fix races in page->_count manipulation mlx4: fix race accessing page->_count ixgbe: fix race accessing page->_count igb: fix race accessing page->_count fm10k: fix race accessing page->_count net/phy: micrel: Add clock support for KSZ8021/KSZ8031 flow-dissector: Fix alignment issue in __skb_flow_get_ports net: filter: fix the comments Documentation: replace __sk_run_filter with __bpf_prog_run macvlan: optimize the receive path macvlan: pass 'bool' type to macvlan_count_rx() drivers: net: xgene: Add 10GbE ethtool support drivers: net: xgene: Add 10GbE support drivers: net: xgene: Preparing for adding 10GbE support dtb: Add 10GbE node to APM X-Gene SoC device tree Documentation: dts: Update section header for APM X-Gene MAINTAINERS: Update APM X-Gene section ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2014-10-11 21:19:00 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-10-11 21:19:00 -0400
commit: ca321885b0511a85e2d1cd40caafedbeb18f4af6 (patch)
tree: 0042e8674aff7ae5785db467836d8d4101906f70 /net/core
parent: 052db7ec86dff26f734031c3ef5c2c03a94af0af (diff)
parent: 01d2d484e49e9bc0ed9b5fdaf345a0e2bf35ffed (diff)
3 files changed, 49 insertions, 31 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index fcd3f6742a6a..647b12265e18 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -51,9 +51,9 @@
  *	@skb: buffer to filter
  *
  * Run the filter code and then cut skb->data to correct size returned by
- * sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
  * than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to sk_run_filter. It returns 0 if the packet should
+ * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
  * be accepted or -EPERM if the packet should be tossed.
  *
  */
@@ -566,11 +566,8 @@ err:
 
 /* Security:
  *
- * A BPF program is able to use 16 cells of memory to store intermediate
- * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter()).
- *
  * As we dont want to clear mem[] array for each packet going through
- * sk_run_filter(), we check that filter loaded by user never try to read
+ * __bpf_prog_run(), we check that filter loaded by user never try to read
  * a cell if not previously written, and we check all branches to be sure
  * a malicious user doesn't try to abuse us.
  */
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 8560dea58803..45084938c403 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -100,6 +100,13 @@ ip:
 		if (ip_is_fragment(iph))
 			ip_proto = 0;
 
+		/* skip the address processing if skb is NULL.  The assumption
+		 * here is that if there is no skb we are not looking for flow
+		 * info but lengths and protocols.
+		 */
+		if (!skb)
+			break;
+
 		iph_to_flow_copy_addrs(flow, iph);
 		break;
 	}
@@ -114,17 +121,15 @@ ipv6:
 			return false;
 
 		ip_proto = iph->nexthdr;
-		flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
-		flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
 		nhoff += sizeof(struct ipv6hdr);
 
-		/* skip the flow label processing if skb is NULL.  The
-		 * assumption here is that if there is no skb we are not
-		 * looking for flow info as much as we are length.
-		 */
+		/* see comment above in IPv4 section */
 		if (!skb)
 			break;
 
+		flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr);
+		flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
+
 		flow_label = ip6_flowlabel(iph);
 		if (flow_label) {
 			/* Awesome, IPv6 packet has a flow label so we can
@@ -231,9 +236,13 @@ ipv6:
 
 	flow->n_proto = proto;
 	flow->ip_proto = ip_proto;
-	flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen);
 	flow->thoff = (u16) nhoff;
 
+	/* unless skb is set we don't need to record port info */
+	if (skb)
+		flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
+						   data, hlen);
+
 	return true;
 }
 EXPORT_SYMBOL(__skb_flow_dissect);
@@ -334,15 +343,16 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
 
 	switch (keys->ip_proto) {
 	case IPPROTO_TCP: {
-		const struct tcphdr *tcph;
-		struct tcphdr _tcph;
+		/* access doff as u8 to avoid unaligned access */
+		const u8 *doff;
+		u8 _doff;
 
-		tcph = __skb_header_pointer(skb, poff, sizeof(_tcph),
-					    data, hlen, &_tcph);
-		if (!tcph)
+		doff = __skb_header_pointer(skb, poff + 12, sizeof(_doff),
+					    data, hlen, &_doff);
+		if (!doff)
 			return poff;
 
-		poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4);
+		poff += max_t(u32, sizeof(struct tcphdr), (*doff & 0xF0) >> 2);
 		break;
 	}
 	case IPPROTO_UDP:
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7b3df0d518ab..829d013745ab 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -360,18 +360,29 @@ refill:
 				goto end;
 		}
 		nc->frag.size = PAGE_SIZE << order;
-recycle:
-		atomic_set(&nc->frag.page->_count, NETDEV_PAGECNT_MAX_BIAS);
+		/* Even if we own the page, we do not use atomic_set().
+		 * This would break get_page_unless_zero() users.
+		 */
+		atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1,
+			   &nc->frag.page->_count);
 		nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
 		nc->frag.offset = 0;
 	}
 
 	if (nc->frag.offset + fragsz > nc->frag.size) {
-		/* avoid unnecessary locked operations if possible */
-		if ((atomic_read(&nc->frag.page->_count) == nc->pagecnt_bias) ||
-		    atomic_sub_and_test(nc->pagecnt_bias, &nc->frag.page->_count))
-			goto recycle;
-		goto refill;
+		if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) {
+			if (!atomic_sub_and_test(nc->pagecnt_bias,
+						 &nc->frag.page->_count))
+				goto refill;
+			/* OK, page count is 0, we can safely set it */
+			atomic_set(&nc->frag.page->_count,
+				   NETDEV_PAGECNT_MAX_BIAS);
+		} else {
+			atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias,
+				   &nc->frag.page->_count);
+		}
+		nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
+		nc->frag.offset = 0;
 	}
 
 	data = page_address(nc->frag.page) + nc->frag.offset;
@@ -4126,11 +4137,11 @@ EXPORT_SYMBOL(skb_vlan_untag);
 /**
  * alloc_skb_with_frags - allocate skb with page frags
  *
- * header_len: size of linear part
- * data_len: needed length in frags
- * max_page_order: max page order desired.
- * errcode: pointer to error code if any
- * gfp_mask: allocation mask
+ * @header_len: size of linear part
+ * @data_len: needed length in frags
+ * @max_page_order: max page order desired.
+ * @errcode: pointer to error code if any
+ * @gfp_mask: allocation mask
  *
  * This can be used to allocate a paged skb, given a maximal order for frags.
  */
author	Linus Torvalds <torvalds@linux-foundation.org>	2014-10-11 21:19:00 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-10-11 21:19:00 -0400
commit	ca321885b0511a85e2d1cd40caafedbeb18f4af6 (patch)
tree	0042e8674aff7ae5785db467836d8d4101906f70 /net/core
parent	052db7ec86dff26f734031c3ef5c2c03a94af0af (diff)
parent	01d2d484e49e9bc0ed9b5fdaf345a0e2bf35ffed (diff)