summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2021-10-01 19:55:09 -0700
committerJakub Kicinski <kuba@kernel.org>2021-10-01 19:58:02 -0700
commit6b7b0c3091fd798ba35f11bbf04c4aefbd5ac4e6 (patch)
treef1f8d908160728e66adb3ab2348e754c72fb4b84 /net
parent20ab39d13e2e9f916cf570fc834f2cadd6e5dc4a (diff)
parentd636c8da2d60cc4841ebd7b6e6a02db5c33e11e4 (diff)
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== bpf-next 2021-10-02 We've added 85 non-merge commits during the last 15 day(s) which contain a total of 132 files changed, 13779 insertions(+), 6724 deletions(-). The main changes are: 1) Massive update on test_bpf.ko coverage for JITs as preparatory work for an upcoming MIPS eBPF JIT, from Johan Almbladh. 2) Add a batched interface for RX buffer allocation in AF_XDP buffer pool, with driver support for i40e and ice from Magnus Karlsson. 3) Add legacy uprobe support to libbpf to complement recently merged legacy kprobe support, from Andrii Nakryiko. 4) Add bpf_trace_vprintk() as variadic printk helper, from Dave Marchevsky. 5) Support saving the register state in verifier when spilling <8byte bounded scalar to the stack, from Martin Lau. 6) Add libbpf opt-in for stricter BPF program section name handling as part of libbpf 1.0 effort, from Andrii Nakryiko. 7) Add a document to help clarifying BPF licensing, from Alexei Starovoitov. 8) Fix skel_internal.h to propagate errno if the loader indicates an internal error, from Kumar Kartikeya Dwivedi. 9) Fix build warnings with -Wcast-function-type so that the option can later be enabled by default for the kernel, from Kees Cook. 10) Fix libbpf to ignore STT_SECTION symbols in legacy map definitions as it otherwise errors out when encountering them, from Toke Høiland-Jørgensen. 11) Teach libbpf to recognize specialized maps (such as for perf RB) and internally remove BTF type IDs when creating them, from Hengqi Chen. 12) Various fixes and improvements to BPF selftests. ==================== Link: https://lore.kernel.org/r/20211002001327.15169-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/bpf/test_run.c6
-rw-r--r--net/xdp/xsk.c15
-rw-r--r--net/xdp/xsk_buff_pool.c132
-rw-r--r--net/xdp/xsk_queue.h12
4 files changed, 127 insertions, 38 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index bf1cb629f618..072f0c16c779 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -807,7 +807,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (ret)
goto free_data;
- bpf_prog_change_xdp(NULL, prog);
+ if (repeat > 1)
+ bpf_prog_change_xdp(NULL, prog);
ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
/* We convert the xdp_buff back to an xdp_md before checking the return
* code so the reference count of any held netdevice will be decremented
@@ -828,7 +829,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
sizeof(struct xdp_md));
out:
- bpf_prog_change_xdp(prog, NULL);
+ if (repeat > 1)
+ bpf_prog_change_xdp(prog, NULL);
free_data:
kfree(data);
free_ctx:
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index d6b500dc4208..f16074eb53c7 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -134,21 +134,6 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
return 0;
}
-void xp_release(struct xdp_buff_xsk *xskb)
-{
- xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
-}
-
-static u64 xp_get_handle(struct xdp_buff_xsk *xskb)
-{
- u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
-
- offset += xskb->pool->headroom;
- if (!xskb->pool->unaligned)
- return xskb->orig_addr + offset;
- return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
-}
-
static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 8de01aaac4a0..90c4e1e819d3 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -44,12 +44,13 @@ void xp_destroy(struct xsk_buff_pool *pool)
struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
struct xdp_umem *umem)
{
+ bool unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
struct xsk_buff_pool *pool;
struct xdp_buff_xsk *xskb;
- u32 i;
+ u32 i, entries;
- pool = kvzalloc(struct_size(pool, free_heads, umem->chunks),
- GFP_KERNEL);
+ entries = unaligned ? umem->chunks : 0;
+ pool = kvzalloc(struct_size(pool, free_heads, entries), GFP_KERNEL);
if (!pool)
goto out;
@@ -63,7 +64,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
pool->free_heads_cnt = umem->chunks;
pool->headroom = umem->headroom;
pool->chunk_size = umem->chunk_size;
- pool->unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+ pool->chunk_shift = ffs(umem->chunk_size) - 1;
+ pool->unaligned = unaligned;
pool->frame_len = umem->chunk_size - umem->headroom -
XDP_PACKET_HEADROOM;
pool->umem = umem;
@@ -81,7 +83,10 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
xskb = &pool->heads[i];
xskb->pool = pool;
xskb->xdp.frame_sz = umem->chunk_size - umem->headroom;
- pool->free_heads[i] = xskb;
+ if (pool->unaligned)
+ pool->free_heads[i] = xskb;
+ else
+ xp_init_xskb_addr(xskb, pool, i * pool->chunk_size);
}
return pool;
@@ -406,6 +411,12 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
if (pool->unaligned)
xp_check_dma_contiguity(dma_map);
+ else
+ for (i = 0; i < pool->heads_cnt; i++) {
+ struct xdp_buff_xsk *xskb = &pool->heads[i];
+
+ xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr);
+ }
err = xp_init_dma_info(pool, dma_map);
if (err) {
@@ -448,12 +459,9 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
if (pool->free_heads_cnt == 0)
return NULL;
- xskb = pool->free_heads[--pool->free_heads_cnt];
-
for (;;) {
if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) {
pool->fq->queue_empty_descs++;
- xp_release(xskb);
return NULL;
}
@@ -466,17 +474,17 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
}
break;
}
- xskq_cons_release(pool->fq);
- xskb->orig_addr = addr;
- xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom;
- if (pool->dma_pages_cnt) {
- xskb->frame_dma = (pool->dma_pages[addr >> PAGE_SHIFT] &
- ~XSK_NEXT_PG_CONTIG_MASK) +
- (addr & ~PAGE_MASK);
- xskb->dma = xskb->frame_dma + pool->headroom +
- XDP_PACKET_HEADROOM;
+ if (pool->unaligned) {
+ xskb = pool->free_heads[--pool->free_heads_cnt];
+ xp_init_xskb_addr(xskb, pool, addr);
+ if (pool->dma_pages_cnt)
+ xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
+ } else {
+ xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
}
+
+ xskq_cons_release(pool->fq);
return xskb;
}
@@ -507,6 +515,96 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
}
EXPORT_SYMBOL(xp_alloc);
+static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
+{
+ u32 i, cached_cons, nb_entries;
+
+ if (max > pool->free_heads_cnt)
+ max = pool->free_heads_cnt;
+ max = xskq_cons_nb_entries(pool->fq, max);
+
+ cached_cons = pool->fq->cached_cons;
+ nb_entries = max;
+ i = max;
+ while (i--) {
+ struct xdp_buff_xsk *xskb;
+ u64 addr;
+ bool ok;
+
+ __xskq_cons_read_addr_unchecked(pool->fq, cached_cons++, &addr);
+
+ ok = pool->unaligned ? xp_check_unaligned(pool, &addr) :
+ xp_check_aligned(pool, &addr);
+ if (unlikely(!ok)) {
+ pool->fq->invalid_descs++;
+ nb_entries--;
+ continue;
+ }
+
+ if (pool->unaligned) {
+ xskb = pool->free_heads[--pool->free_heads_cnt];
+ xp_init_xskb_addr(xskb, pool, addr);
+ if (pool->dma_pages_cnt)
+ xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
+ } else {
+ xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
+ }
+
+ *xdp = &xskb->xdp;
+ xdp++;
+ }
+
+ xskq_cons_release_n(pool->fq, max);
+ return nb_entries;
+}
+
+static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 nb_entries)
+{
+ struct xdp_buff_xsk *xskb;
+ u32 i;
+
+ nb_entries = min_t(u32, nb_entries, pool->free_list_cnt);
+
+ i = nb_entries;
+ while (i--) {
+ xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node);
+ list_del(&xskb->free_list_node);
+
+ *xdp = &xskb->xdp;
+ xdp++;
+ }
+ pool->free_list_cnt -= nb_entries;
+
+ return nb_entries;
+}
+
+u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
+{
+ u32 nb_entries1 = 0, nb_entries2;
+
+ if (unlikely(pool->dma_need_sync)) {
+ /* Slow path */
+ *xdp = xp_alloc(pool);
+ return !!*xdp;
+ }
+
+ if (unlikely(pool->free_list_cnt)) {
+ nb_entries1 = xp_alloc_reused(pool, xdp, max);
+ if (nb_entries1 == max)
+ return nb_entries1;
+
+ max -= nb_entries1;
+ xdp += nb_entries1;
+ }
+
+ nb_entries2 = xp_alloc_new_from_fq(pool, xdp, max);
+ if (!nb_entries2)
+ pool->fq->queue_empty_descs++;
+
+ return nb_entries1 + nb_entries2;
+}
+EXPORT_SYMBOL(xp_alloc_batch);
+
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
{
if (pool->free_list_cnt >= count)
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 9ae13cccfb28..e9aa2c236356 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -111,14 +111,18 @@ struct xsk_queue {
/* Functions that read and validate content from consumer rings. */
-static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
+static inline void __xskq_cons_read_addr_unchecked(struct xsk_queue *q, u32 cached_cons, u64 *addr)
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+ u32 idx = cached_cons & q->ring_mask;
- if (q->cached_cons != q->cached_prod) {
- u32 idx = q->cached_cons & q->ring_mask;
+ *addr = ring->desc[idx];
+}
- *addr = ring->desc[idx];
+static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
+{
+ if (q->cached_cons != q->cached_prod) {
+ __xskq_cons_read_addr_unchecked(q, q->cached_cons, addr);
return true;
}