summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorMina Almasry <almasrymina@google.com>2024-05-02 10:54:22 -0700
committerJakub Kicinski <kuba@kernel.org>2024-05-03 16:05:53 -0700
commit173e7622ccb3f46834bd4176ed363f435e142942 (patch)
treefd39a0842d666f9f90ba0740223613d16e1d08fa /net
parent5bfadc573711a1e68d05d25e10ae747385c4c253 (diff)
Revert "net: mirror skb frag ref/unref helpers"
This reverts commit a580ea994fd37f4105028f5a85c38ff6508a2b25. This revert is to resolve Dragos's report of page_pool leak here: https://lore.kernel.org/lkml/20240424165646.1625690-2-dtatulea@nvidia.com/ The reverted patch interacts very badly with commit 2cc3aeb5eccc ("skbuff: Fix a potential race while recycling page_pool packets"). The reverted commit hopes that the pp_recycle + is_pp_page variables do not change between the skb_frag_ref and skb_frag_unref operation. If such a change occurs, the skb_frag_ref/unref will not operate on the same reference type. In the case of Dragos's report, the grabbed ref was a pp ref, but the unref was a page ref, because the pp_recycle setting on the skb was changed. Attempting to fix this issue on the fly is risky. Lets revert and I hope to reland this with better understanding and testing to ensure we don't regress some edge case while streamlining skb reffing. Fixes: a580ea994fd3 ("net: mirror skb frag ref/unref helpers") Reported-by: Dragos Tatulea <dtatulea@nvidia.com> Signed-off-by: Mina Almasry <almasrymina@google.com> Link: https://lore.kernel.org/r/20240502175423.2456544-1-almasrymina@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/core/skbuff.c46
-rw-r--r--net/tls/tls_device_fallback.c2
2 files changed, 43 insertions, 5 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 28cd640a6ea9..466999a7515e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -904,6 +904,11 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
+static bool is_pp_page(struct page *page)
+{
+ return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
+}
+
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
unsigned int headroom)
{
@@ -1025,6 +1030,37 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
return napi_pp_put_page(virt_to_page(data));
}
+/**
+ * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb
+ * @skb: page pool aware skb
+ *
+ * Increase the fragment reference count (pp_ref_count) of a skb. This is
+ * intended to gain fragment references only for page pool aware skbs,
+ * i.e. when skb->pp_recycle is true, and not for fragments in a
+ * non-pp-recycling skb. It has a fallback to increase references on normal
+ * pages, as page pool aware skbs may also have normal page fragments.
+ */
+static int skb_pp_frag_ref(struct sk_buff *skb)
+{
+ struct skb_shared_info *shinfo;
+ struct page *head_page;
+ int i;
+
+ if (!skb->pp_recycle)
+ return -EINVAL;
+
+ shinfo = skb_shinfo(skb);
+
+ for (i = 0; i < shinfo->nr_frags; i++) {
+ head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
+ if (likely(is_pp_page(head_page)))
+ page_pool_ref_page(head_page);
+ else
+ page_ref_inc(head_page);
+ }
+ return 0;
+}
+
static void skb_kfree_head(void *head, unsigned int end_offset)
{
if (end_offset == SKB_SMALL_HEAD_HEADROOM)
@@ -4160,7 +4196,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
to++;
} else {
- __skb_frag_ref(fragfrom, skb->pp_recycle);
+ __skb_frag_ref(fragfrom);
skb_frag_page_copy(fragto, fragfrom);
skb_frag_off_copy(fragto, fragfrom);
skb_frag_size_set(fragto, todo);
@@ -4810,7 +4846,7 @@ normal:
}
*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
- __skb_frag_ref(nskb_frag, nskb->pp_recycle);
+ __skb_frag_ref(nskb_frag);
size = skb_frag_size(nskb_frag);
if (pos < offset) {
@@ -5941,8 +5977,10 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
/* if the skb is not cloned this does nothing
* since we set nr_frags to 0.
*/
- for (i = 0; i < from_shinfo->nr_frags; i++)
- __skb_frag_ref(&from_shinfo->frags[i], from->pp_recycle);
+ if (skb_pp_frag_ref(from)) {
+ for (i = 0; i < from_shinfo->nr_frags; i++)
+ __skb_frag_ref(&from_shinfo->frags[i]);
+ }
to->truesize += delta;
to->len += len;
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 9237dded4467..f9e3d3d90dcf 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -278,7 +278,7 @@ static int fill_sg_in(struct scatterlist *sg_in,
for (i = 0; remaining > 0; i++) {
skb_frag_t *frag = &record->frags[i];
- __skb_frag_ref(frag, false);
+ __skb_frag_ref(frag);
sg_set_page(sg_in + i, skb_frag_page(frag),
skb_frag_size(frag), skb_frag_off(frag));