diff options
Diffstat (limited to 'net/core/skbuff.c')
| -rw-r--r-- | net/core/skbuff.c | 148 | 
1 files changed, 111 insertions, 37 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a298992060e6..faa6c86da2a5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -73,7 +73,7 @@  #include <net/mpls.h>  #include <net/mptcp.h>  #include <net/mctp.h> -#include <net/page_pool.h> +#include <net/page_pool/helpers.h>  #include <net/dropreason.h>  #include <linux/uaccess.h> @@ -879,11 +879,56 @@ static void skb_clone_fraglist(struct sk_buff *skb)  		skb_get(list);  } +#if IS_ENABLED(CONFIG_PAGE_POOL) +bool napi_pp_put_page(struct page *page, bool napi_safe) +{ +	bool allow_direct = false; +	struct page_pool *pp; + +	page = compound_head(page); + +	/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation +	 * in order to preserve any existing bits, such as bit 0 for the +	 * head page of compound page and bit 1 for pfmemalloc page, so +	 * mask those bits for freeing side when doing below checking, +	 * and page_is_pfmemalloc() is checked in __page_pool_put_page() +	 * to avoid recycling the pfmemalloc page. +	 */ +	if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE)) +		return false; + +	pp = page->pp; + +	/* Allow direct recycle if we have reasons to believe that we are +	 * in the same context as the consumer would run, so there's +	 * no possible race. +	 * __page_pool_put_page() makes sure we're not in hardirq context +	 * and interrupts are enabled prior to accessing the cache. +	 */ +	if (napi_safe || in_softirq()) { +		const struct napi_struct *napi = READ_ONCE(pp->p.napi); + +		allow_direct = napi && +			READ_ONCE(napi->list_owner) == smp_processor_id(); +	} + +	/* Driver set this to memory recycling info. Reset it on recycle. +	 * This will *not* work for NIC using a split-page memory model. +	 * The page will be returned to the pool here regardless of the +	 * 'flipped' fragment being in use or not. +	 */ +	page_pool_put_full_page(pp, page, allow_direct); + +	return true; +} +EXPORT_SYMBOL(napi_pp_put_page); +#endif +  static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)  {  	if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)  		return false; -	return page_pool_return_skb_page(virt_to_page(data), napi_safe); +	return napi_pp_put_page(virt_to_page(data), napi_safe);  }  static void skb_kfree_head(void *head, unsigned int end_offset) @@ -3656,20 +3701,23 @@ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)  EXPORT_SYMBOL(skb_dequeue_tail);  /** - *	skb_queue_purge - empty a list + *	skb_queue_purge_reason - empty a list   *	@list: list to empty + *	@reason: drop reason   *   *	Delete all buffers on an &sk_buff list. Each buffer is removed from   *	the list and one reference dropped. This function takes the list   *	lock and is atomic with respect to other list locking functions.   */ -void skb_queue_purge(struct sk_buff_head *list) +void skb_queue_purge_reason(struct sk_buff_head *list, +			    enum skb_drop_reason reason)  {  	struct sk_buff *skb; +  	while ((skb = skb_dequeue(list)) != NULL) -		kfree_skb(skb); +		kfree_skb_reason(skb, reason);  } -EXPORT_SYMBOL(skb_queue_purge); +EXPORT_SYMBOL(skb_queue_purge_reason);  /**   *	skb_rbtree_purge - empty a skb rbtree @@ -3697,6 +3745,27 @@ unsigned int skb_rbtree_purge(struct rb_root *root)  	return sum;  } +void skb_errqueue_purge(struct sk_buff_head *list) +{ +	struct sk_buff *skb, *next; +	struct sk_buff_head kill; +	unsigned long flags; + +	__skb_queue_head_init(&kill); + +	spin_lock_irqsave(&list->lock, flags); +	skb_queue_walk_safe(list, skb, next) { +		if (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ZEROCOPY || +		    SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) +			continue; +		__skb_unlink(skb, list); +		__skb_queue_tail(&kill, skb); +	} +	spin_unlock_irqrestore(&list->lock, flags); +	__skb_queue_purge(&kill); +} +EXPORT_SYMBOL(skb_errqueue_purge); +  /**   *	skb_queue_head - queue a buffer at the list head   *	@list: list to use @@ -4750,12 +4819,23 @@ static void skb_extensions_init(void)  static void skb_extensions_init(void) {}  #endif +/* The SKB kmem_cache slab is critical for network performance.  Never + * merge/alias the slab with similar sized objects.  This avoids fragmentation + * that hurts performance of kmem_cache_{alloc,free}_bulk APIs. + */ +#ifndef CONFIG_SLUB_TINY +#define FLAG_SKB_NO_MERGE	SLAB_NO_MERGE +#else /* CONFIG_SLUB_TINY - simple loop in kmem_cache_alloc_bulk */ +#define FLAG_SKB_NO_MERGE	0 +#endif +  void __init skb_init(void)  {  	skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",  					      sizeof(struct sk_buff),  					      0, -					      SLAB_HWCACHE_ALIGN|SLAB_PANIC, +					      SLAB_HWCACHE_ALIGN|SLAB_PANIC| +						FLAG_SKB_NO_MERGE,  					      offsetof(struct sk_buff, cb),  					      sizeof_field(struct sk_buff, cb),  					      NULL); @@ -6204,7 +6284,7 @@ EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);   *   * @header_len: size of linear part   * @data_len: needed length in frags - * @max_page_order: max page order desired. + * @order: max page order desired.   * @errcode: pointer to error code if any   * @gfp_mask: allocation mask   * @@ -6212,21 +6292,17 @@ EXPORT_SYMBOL_GPL(skb_mpls_dec_ttl);   */  struct sk_buff *alloc_skb_with_frags(unsigned long header_len,  				     unsigned long data_len, -				     int max_page_order, +				     int order,  				     int *errcode,  				     gfp_t gfp_mask)  { -	int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;  	unsigned long chunk;  	struct sk_buff *skb;  	struct page *page; -	int i; +	int nr_frags = 0;  	*errcode = -EMSGSIZE; -	/* Note this test could be relaxed, if we succeed to allocate -	 * high order pages... -	 */ -	if (npages > MAX_SKB_FRAGS) +	if (unlikely(data_len > MAX_SKB_FRAGS * (PAGE_SIZE << order)))  		return NULL;  	*errcode = -ENOBUFS; @@ -6234,34 +6310,32 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,  	if (!skb)  		return NULL; -	skb->truesize += npages << PAGE_SHIFT; - -	for (i = 0; npages > 0; i++) { -		int order = max_page_order; +	while (data_len) { +		if (nr_frags == MAX_SKB_FRAGS - 1) +			goto failure; +		while (order && PAGE_ALIGN(data_len) < (PAGE_SIZE << order)) +			order--; -		while (order) { -			if (npages >= 1 << order) { -				page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) | -						   __GFP_COMP | -						   __GFP_NOWARN, -						   order); -				if (page) -					goto fill_page; -				/* Do not retry other high order allocations */ -				order = 1; -				max_page_order = 0; +		if (order) { +			page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) | +					   __GFP_COMP | +					   __GFP_NOWARN, +					   order); +			if (!page) { +				order--; +				continue;  			} -			order--; +		} else { +			page = alloc_page(gfp_mask); +			if (!page) +				goto failure;  		} -		page = alloc_page(gfp_mask); -		if (!page) -			goto failure; -fill_page:  		chunk = min_t(unsigned long, data_len,  			      PAGE_SIZE << order); -		skb_fill_page_desc(skb, i, page, 0, chunk); +		skb_fill_page_desc(skb, nr_frags, page, 0, chunk); +		nr_frags++; +		skb->truesize += (PAGE_SIZE << order);  		data_len -= chunk; -		npages -= 1 << order;  	}  	return skb;  | 
