summaryrefslogtreecommitdiff
path: root/tools/lib/bpf
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-10-03 13:02:48 -0700
committerJakub Kicinski <kuba@kernel.org>2022-10-03 13:02:49 -0700
commita08d97a1935bee66b099b21feddad19c1fd90d0e (patch)
treeb11aaa5e616432f1012c7c738f12ab64dacef9b2 /tools/lib/bpf
parent62c07983bef9d3e78e71189441e1a470f0d1e653 (diff)
parent820dc0523e05c12810bb6bf4e56ce26e4c1948a2 (diff)
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2022-10-03 We've added 143 non-merge commits during the last 27 day(s) which contain a total of 151 files changed, 8321 insertions(+), 1402 deletions(-). The main changes are: 1) Add kfuncs for PKCS#7 signature verification from BPF programs, from Roberto Sassu. 2) Add support for struct-based arguments for trampoline based BPF programs, from Yonghong Song. 3) Fix entry IP for kprobe-multi and trampoline probes under IBT enabled, from Jiri Olsa. 4) Batch of improvements to veristat selftest tool in particular to add CSV output, a comparison mode for CSV outputs and filtering, from Andrii Nakryiko. 5) Add preparatory changes needed for the BPF core for upcoming BPF HID support, from Benjamin Tissoires. 6) Support for direct writes to nf_conn's mark field from tc and XDP BPF program types, from Daniel Xu. 7) Initial batch of documentation improvements for BPF insn set spec, from Dave Thaler. 8) Add a new BPF_MAP_TYPE_USER_RINGBUF map which provides single-user-space-producer / single-kernel-consumer semantics for BPF ring buffer, from David Vernet. 9) Follow-up fixes to BPF allocator under RT to always use raw spinlock for the BPF hashtab's bucket lock, from Hou Tao. 10) Allow creating an iterator that loops through only the resources of one task/thread instead of all, from Kui-Feng Lee. 11) Add support for kptrs in the per-CPU arraymap, from Kumar Kartikeya Dwivedi. 12) Add a new kfunc helper for nf to set src/dst NAT IP/port in a newly allocated CT entry which is not yet inserted, from Lorenzo Bianconi. 13) Remove invalid recursion check for struct_ops for TCP congestion control BPF programs, from Martin KaFai Lau. 14) Fix W^X issue with BPF trampoline and BPF dispatcher, from Song Liu. 15) Fix percpu_counter leakage in BPF hashtab allocation error path, from Tetsuo Handa. 16) Various cleanups in BPF selftests to use preferred ASSERT_* macros, from Wang Yufen. 17) Add invocation for cgroup/connect{4,6} BPF programs for ICMP pings, from YiFei Zhu. 18) Lift blinding decision under bpf_jit_harden = 1 to bpf_capable(), from Yauheni Kaliuta. 19) Various libbpf fixes and cleanups including a libbpf NULL pointer deref, from Xin Liu. * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (143 commits) net: netfilter: move bpf_ct_set_nat_info kfunc in nf_nat_bpf.c Documentation: bpf: Add implementation notes documentations to table of contents bpf, docs: Delete misformatted table. selftests/xsk: Fix double free bpftool: Fix error message of strerror libbpf: Fix overrun in netlink attribute iteration selftests/bpf: Fix spelling mistake "unpriviledged" -> "unprivileged" samples/bpf: Fix typo in xdp_router_ipv4 sample bpftool: Remove unused struct event_ring_info bpftool: Remove unused struct btf_attach_point bpf, docs: Add TOC and fix formatting. bpf, docs: Add Clang note about BPF_ALU bpf, docs: Move Clang notes to a separate file bpf, docs: Linux byteswap note bpf, docs: Move legacy packet instructions to a separate file selftests/bpf: Check -EBUSY for the recurred bpf_setsockopt(TCP_CONGESTION) bpf: tcp: Stop bpf_setsockopt(TCP_CONGESTION) in init ops to recur itself bpf: Refactor bpf_setsockopt(TCP_CONGESTION) handling into another function bpf: Move the "cdg" tcp-cc check to the common sol_tcp_sockopt() bpf: Add __bpf_prog_{enter,exit}_struct_ops for struct_ops trampoline ... ==================== Link: https://lore.kernel.org/r/20221003194915.11847-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'tools/lib/bpf')
-rw-r--r--tools/lib/bpf/bpf_helpers.h31
-rw-r--r--tools/lib/bpf/bpf_tracing.h107
-rw-r--r--tools/lib/bpf/btf.c32
-rw-r--r--tools/lib/bpf/btf.h25
-rw-r--r--tools/lib/bpf/btf_dump.c2
-rw-r--r--tools/lib/bpf/libbpf.c106
-rw-r--r--tools/lib/bpf/libbpf.h111
-rw-r--r--tools/lib/bpf/libbpf.map10
-rw-r--r--tools/lib/bpf/libbpf_probes.c1
-rw-r--r--tools/lib/bpf/libbpf_version.h2
-rw-r--r--tools/lib/bpf/nlattr.c2
-rw-r--r--tools/lib/bpf/ringbuf.c271
-rw-r--r--tools/lib/bpf/usdt.c2
13 files changed, 596 insertions, 106 deletions
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 867b734839dd..d37c4fe2849d 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -131,7 +131,7 @@
/*
* Helper function to perform a tail call with a constant/immediate map slot.
*/
-#if (!defined(__clang__) || __clang_major__ >= 8) && defined(__bpf__)
+#if __clang_major__ >= 8 && defined(__bpf__)
static __always_inline void
bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
{
@@ -139,8 +139,8 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
__bpf_unreachable();
/*
- * Provide a hard guarantee that the compiler won't optimize setting r2
- * (map pointer) and r3 (constant map index) from _different paths_ ending
+ * Provide a hard guarantee that LLVM won't optimize setting r2 (map
+ * pointer) and r3 (constant map index) from _different paths_ ending
* up at the _same_ call insn as otherwise we won't be able to use the
* jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel
* given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key
@@ -148,37 +148,18 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
*
* Note on clobber list: we need to stay in-line with BPF calling
* convention, so even if we don't end up using r0, r4, r5, we need
- * to mark them as clobber so that the compiler doesn't end up using
- * them before / after the call.
+ * to mark them as clobber so that LLVM doesn't end up using them
+ * before / after the call.
*/
- asm volatile(
-#ifdef __clang__
- "r1 = %[ctx]\n\t"
+ asm volatile("r1 = %[ctx]\n\t"
"r2 = %[map]\n\t"
"r3 = %[slot]\n\t"
-#else
- "mov %%r1,%[ctx]\n\t"
- "mov %%r2,%[map]\n\t"
- "mov %%r3,%[slot]\n\t"
-#endif
"call 12"
:: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
: "r0", "r1", "r2", "r3", "r4", "r5");
}
#endif
-/*
- * Helper structure used by eBPF C program
- * to describe BPF map attributes to libbpf loader
- */
-struct bpf_map_def {
- unsigned int type;
- unsigned int key_size;
- unsigned int value_size;
- unsigned int max_entries;
- unsigned int map_flags;
-} __attribute__((deprecated("use BTF-defined maps in .maps section")));
-
enum libbpf_pin_type {
LIBBPF_PIN_NONE,
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 5fdb93da423b..2972dc25ff72 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -438,6 +438,113 @@ typeof(name(0)) name(unsigned long long *ctx) \
static __always_inline typeof(name(0)) \
____##name(unsigned long long *ctx, ##args)
+#ifndef ___bpf_nth2
+#define ___bpf_nth2(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, \
+ _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, N, ...) N
+#endif
+#ifndef ___bpf_narg2
+#define ___bpf_narg2(...) \
+ ___bpf_nth2(_, ##__VA_ARGS__, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, \
+ 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0)
+#endif
+
+#define ___bpf_treg_cnt(t) \
+ __builtin_choose_expr(sizeof(t) == 1, 1, \
+ __builtin_choose_expr(sizeof(t) == 2, 1, \
+ __builtin_choose_expr(sizeof(t) == 4, 1, \
+ __builtin_choose_expr(sizeof(t) == 8, 1, \
+ __builtin_choose_expr(sizeof(t) == 16, 2, \
+ (void)0)))))
+
+#define ___bpf_reg_cnt0() (0)
+#define ___bpf_reg_cnt1(t, x) (___bpf_reg_cnt0() + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt2(t, x, args...) (___bpf_reg_cnt1(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt3(t, x, args...) (___bpf_reg_cnt2(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt4(t, x, args...) (___bpf_reg_cnt3(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt5(t, x, args...) (___bpf_reg_cnt4(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt6(t, x, args...) (___bpf_reg_cnt5(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt7(t, x, args...) (___bpf_reg_cnt6(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt8(t, x, args...) (___bpf_reg_cnt7(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt9(t, x, args...) (___bpf_reg_cnt8(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt10(t, x, args...) (___bpf_reg_cnt9(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt11(t, x, args...) (___bpf_reg_cnt10(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt12(t, x, args...) (___bpf_reg_cnt11(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt(args...) ___bpf_apply(___bpf_reg_cnt, ___bpf_narg2(args))(args)
+
+#define ___bpf_union_arg(t, x, n) \
+ __builtin_choose_expr(sizeof(t) == 1, ({ union { __u8 z[1]; t x; } ___t = { .z = {ctx[n]}}; ___t.x; }), \
+ __builtin_choose_expr(sizeof(t) == 2, ({ union { __u16 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \
+ __builtin_choose_expr(sizeof(t) == 4, ({ union { __u32 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \
+ __builtin_choose_expr(sizeof(t) == 8, ({ union { __u64 z[1]; t x; } ___t = {.z = {ctx[n]} }; ___t.x; }), \
+ __builtin_choose_expr(sizeof(t) == 16, ({ union { __u64 z[2]; t x; } ___t = {.z = {ctx[n], ctx[n + 1]} }; ___t.x; }), \
+ (void)0)))))
+
+#define ___bpf_ctx_arg0(n, args...)
+#define ___bpf_ctx_arg1(n, t, x) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt1(t, x))
+#define ___bpf_ctx_arg2(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt2(t, x, args)) ___bpf_ctx_arg1(n, args)
+#define ___bpf_ctx_arg3(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt3(t, x, args)) ___bpf_ctx_arg2(n, args)
+#define ___bpf_ctx_arg4(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt4(t, x, args)) ___bpf_ctx_arg3(n, args)
+#define ___bpf_ctx_arg5(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt5(t, x, args)) ___bpf_ctx_arg4(n, args)
+#define ___bpf_ctx_arg6(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt6(t, x, args)) ___bpf_ctx_arg5(n, args)
+#define ___bpf_ctx_arg7(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt7(t, x, args)) ___bpf_ctx_arg6(n, args)
+#define ___bpf_ctx_arg8(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt8(t, x, args)) ___bpf_ctx_arg7(n, args)
+#define ___bpf_ctx_arg9(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt9(t, x, args)) ___bpf_ctx_arg8(n, args)
+#define ___bpf_ctx_arg10(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt10(t, x, args)) ___bpf_ctx_arg9(n, args)
+#define ___bpf_ctx_arg11(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt11(t, x, args)) ___bpf_ctx_arg10(n, args)
+#define ___bpf_ctx_arg12(n, t, x, args...) , ___bpf_union_arg(t, x, n - ___bpf_reg_cnt12(t, x, args)) ___bpf_ctx_arg11(n, args)
+#define ___bpf_ctx_arg(args...) ___bpf_apply(___bpf_ctx_arg, ___bpf_narg2(args))(___bpf_reg_cnt(args), args)
+
+#define ___bpf_ctx_decl0()
+#define ___bpf_ctx_decl1(t, x) , t x
+#define ___bpf_ctx_decl2(t, x, args...) , t x ___bpf_ctx_decl1(args)
+#define ___bpf_ctx_decl3(t, x, args...) , t x ___bpf_ctx_decl2(args)
+#define ___bpf_ctx_decl4(t, x, args...) , t x ___bpf_ctx_decl3(args)
+#define ___bpf_ctx_decl5(t, x, args...) , t x ___bpf_ctx_decl4(args)
+#define ___bpf_ctx_decl6(t, x, args...) , t x ___bpf_ctx_decl5(args)
+#define ___bpf_ctx_decl7(t, x, args...) , t x ___bpf_ctx_decl6(args)
+#define ___bpf_ctx_decl8(t, x, args...) , t x ___bpf_ctx_decl7(args)
+#define ___bpf_ctx_decl9(t, x, args...) , t x ___bpf_ctx_decl8(args)
+#define ___bpf_ctx_decl10(t, x, args...) , t x ___bpf_ctx_decl9(args)
+#define ___bpf_ctx_decl11(t, x, args...) , t x ___bpf_ctx_decl10(args)
+#define ___bpf_ctx_decl12(t, x, args...) , t x ___bpf_ctx_decl11(args)
+#define ___bpf_ctx_decl(args...) ___bpf_apply(___bpf_ctx_decl, ___bpf_narg2(args))(args)
+
+/*
+ * BPF_PROG2 is an enhanced version of BPF_PROG in order to handle struct
+ * arguments. Since each struct argument might take one or two u64 values
+ * in the trampoline stack, argument type size is needed to place proper number
+ * of u64 values for each argument. Therefore, BPF_PROG2 has different
+ * syntax from BPF_PROG. For example, for the following BPF_PROG syntax:
+ *
+ * int BPF_PROG(test2, int a, int b) { ... }
+ *
+ * the corresponding BPF_PROG2 syntax is:
+ *
+ * int BPF_PROG2(test2, int, a, int, b) { ... }
+ *
+ * where type and the corresponding argument name are separated by comma.
+ *
+ * Use BPF_PROG2 macro if one of the arguments might be a struct/union larger
+ * than 8 bytes:
+ *
+ * int BPF_PROG2(test_struct_arg, struct bpf_testmod_struct_arg_1, a, int, b,
+ * int, c, int, d, struct bpf_testmod_struct_arg_2, e, int, ret)
+ * {
+ * // access a, b, c, d, e, and ret directly
+ * ...
+ * }
+ */
+#define BPF_PROG2(name, args...) \
+name(unsigned long long *ctx); \
+static __always_inline typeof(name(0)) \
+____##name(unsigned long long *ctx ___bpf_ctx_decl(args)); \
+typeof(name(0)) name(unsigned long long *ctx) \
+{ \
+ return ____##name(ctx ___bpf_ctx_arg(args)); \
+} \
+static __always_inline typeof(name(0)) \
+____##name(unsigned long long *ctx ___bpf_ctx_decl(args))
+
struct pt_regs;
#define ___bpf_kprobe_args0() ctx
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 361131518d63..d88647da2c7f 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -4642,20 +4642,17 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
*/
struct btf *btf__load_vmlinux_btf(void)
{
- struct {
- const char *path_fmt;
- bool raw_btf;
- } locations[] = {
+ const char *locations[] = {
/* try canonical vmlinux BTF through sysfs first */
- { "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
- /* fall back to trying to find vmlinux ELF on disk otherwise */
- { "/boot/vmlinux-%1$s" },
- { "/lib/modules/%1$s/vmlinux-%1$s" },
- { "/lib/modules/%1$s/build/vmlinux" },
- { "/usr/lib/modules/%1$s/kernel/vmlinux" },
- { "/usr/lib/debug/boot/vmlinux-%1$s" },
- { "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
- { "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
+ "/sys/kernel/btf/vmlinux",
+ /* fall back to trying to find vmlinux on disk otherwise */
+ "/boot/vmlinux-%1$s",
+ "/lib/modules/%1$s/vmlinux-%1$s",
+ "/lib/modules/%1$s/build/vmlinux",
+ "/usr/lib/modules/%1$s/kernel/vmlinux",
+ "/usr/lib/debug/boot/vmlinux-%1$s",
+ "/usr/lib/debug/boot/vmlinux-%1$s.debug",
+ "/usr/lib/debug/lib/modules/%1$s/vmlinux",
};
char path[PATH_MAX + 1];
struct utsname buf;
@@ -4665,15 +4662,12 @@ struct btf *btf__load_vmlinux_btf(void)
uname(&buf);
for (i = 0; i < ARRAY_SIZE(locations); i++) {
- snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
+ snprintf(path, PATH_MAX, locations[i], buf.release);
- if (access(path, R_OK))
+ if (faccessat(AT_FDCWD, path, R_OK, AT_EACCESS))
continue;
- if (locations[i].raw_btf)
- btf = btf__parse_raw(path);
- else
- btf = btf__parse_elf(path, NULL);
+ btf = btf__parse(path, NULL);
err = libbpf_get_error(btf);
pr_debug("loading kernel BTF '%s': %d\n", path, err);
if (err)
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index ae543144ee30..8e6880d91c84 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -486,6 +486,8 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t)
return (struct btf_enum *)(t + 1);
}
+struct btf_enum64;
+
static inline struct btf_enum64 *btf_enum64(const struct btf_type *t)
{
return (struct btf_enum64 *)(t + 1);
@@ -493,7 +495,28 @@ static inline struct btf_enum64 *btf_enum64(const struct btf_type *t)
static inline __u64 btf_enum64_value(const struct btf_enum64 *e)
{
- return ((__u64)e->val_hi32 << 32) | e->val_lo32;
+ /* struct btf_enum64 is introduced in Linux 6.0, which is very
+ * bleeding-edge. Here we are avoiding relying on struct btf_enum64
+ * definition coming from kernel UAPI headers to support wider range
+ * of system-wide kernel headers.
+ *
+ * Given this header can be also included from C++ applications, that
+ * further restricts C tricks we can use (like using compatible
+ * anonymous struct). So just treat struct btf_enum64 as
+ * a three-element array of u32 and access second (lo32) and third
+ * (hi32) elements directly.
+ *
+ * For reference, here is a struct btf_enum64 definition:
+ *
+ * const struct btf_enum64 {
+ * __u32 name_off;
+ * __u32 val_lo32;
+ * __u32 val_hi32;
+ * };
+ */
+ const __u32 *e64 = (const __u32 *)e;
+
+ return ((__u64)e64[2] << 32) | e64[1];
}
static inline struct btf_member *btf_members(const struct btf_type *t)
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 627edb5bb6de..4221f73a74d0 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -2385,7 +2385,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0);
/* default indent string is a tab */
- if (!opts->indent_str)
+ if (!OPTS_GET(opts, indent_str, NULL))
d->typed_dump->indent_str[0] = '\t';
else
libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 3ad139285fad..184ce1684dcd 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -163,6 +163,7 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
[BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
[BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
+ [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
};
static const char * const prog_type_name[] = {
@@ -883,7 +884,7 @@ __u32 get_kernel_version(void)
__u32 major, minor, patch;
struct utsname info;
- if (access(ubuntu_kver_file, R_OK) == 0) {
+ if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) == 0) {
FILE *f;
f = fopen(ubuntu_kver_file, "r");
@@ -2096,19 +2097,30 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
return true;
}
+static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
+{
+ int len;
+
+ len = snprintf(buf, buf_sz, "%s/%s", path, name);
+ if (len < 0)
+ return -EINVAL;
+ if (len >= buf_sz)
+ return -ENAMETOOLONG;
+
+ return 0;
+}
+
static int build_map_pin_path(struct bpf_map *map, const char *path)
{
char buf[PATH_MAX];
- int len;
+ int err;
if (!path)
path = "/sys/fs/bpf";
- len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
- if (len < 0)
- return -EINVAL;
- else if (len >= PATH_MAX)
- return -ENAMETOOLONG;
+ err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
+ if (err)
+ return err;
return bpf_map__set_pin_path(map, buf);
}
@@ -2372,6 +2384,12 @@ static size_t adjust_ringbuf_sz(size_t sz)
return sz;
}
+static bool map_is_ringbuf(const struct bpf_map *map)
+{
+ return map->def.type == BPF_MAP_TYPE_RINGBUF ||
+ map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
+}
+
static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
{
map->def.type = def->map_type;
@@ -2386,7 +2404,7 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def
map->btf_value_type_id = def->value_type_id;
/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
- if (map->def.type == BPF_MAP_TYPE_RINGBUF)
+ if (map_is_ringbuf(map))
map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
if (def->parts & MAP_DEF_MAP_TYPE)
@@ -4369,7 +4387,7 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
map->def.max_entries = max_entries;
/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
- if (map->def.type == BPF_MAP_TYPE_RINGBUF)
+ if (map_is_ringbuf(map))
map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
return 0;
@@ -7961,17 +7979,9 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
continue;
if (path) {
- int len;
-
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
- bpf_map__name(map));
- if (len < 0) {
- err = -EINVAL;
- goto err_unpin_maps;
- } else if (len >= PATH_MAX) {
- err = -ENAMETOOLONG;
+ err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
+ if (err)
goto err_unpin_maps;
- }
sanitize_pin_path(buf);
pin_path = buf;
} else if (!map->pin_path) {
@@ -8009,14 +8019,9 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
char buf[PATH_MAX];
if (path) {
- int len;
-
- len = snprintf(buf, PATH_MAX, "%s/%s", path,
- bpf_map__name(map));
- if (len < 0)
- return libbpf_err(-EINVAL);
- else if (len >= PATH_MAX)
- return libbpf_err(-ENAMETOOLONG);
+ err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
+ if (err)
+ return libbpf_err(err);
sanitize_pin_path(buf);
pin_path = buf;
} else if (!map->pin_path) {
@@ -8034,6 +8039,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
{
struct bpf_program *prog;
+ char buf[PATH_MAX];
int err;
if (!obj)
@@ -8045,17 +8051,9 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
}
bpf_object__for_each_program(prog, obj) {
- char buf[PATH_MAX];
- int len;
-
- len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
- if (len < 0) {
- err = -EINVAL;
- goto err_unpin_programs;
- } else if (len >= PATH_MAX) {
- err = -ENAMETOOLONG;
+ err = pathname_concat(buf, sizeof(buf), path, prog->name);
+ if (err)
goto err_unpin_programs;
- }
err = bpf_program__pin(prog, buf);
if (err)
@@ -8066,13 +8064,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
err_unpin_programs:
while ((prog = bpf_object__prev_program(obj, prog))) {
- char buf[PATH_MAX];
- int len;
-
- len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
- if (len < 0)
- continue;
- else if (len >= PATH_MAX)
+ if (pathname_concat(buf, sizeof(buf), path, prog->name))
continue;
bpf_program__unpin(prog, buf);
@@ -8091,13 +8083,10 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
bpf_object__for_each_program(prog, obj) {
char buf[PATH_MAX];
- int len;
- len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
- if (len < 0)
- return libbpf_err(-EINVAL);
- else if (len >= PATH_MAX)
- return libbpf_err(-ENAMETOOLONG);
+ err = pathname_concat(buf, sizeof(buf), path, prog->name);
+ if (err)
+ return libbpf_err(err);
err = bpf_program__unpin(prog, buf);
if (err)
@@ -9084,11 +9073,15 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
int err = 0;
/* BPF program's BTF ID */
- if (attach_prog_fd) {
+ if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
+ if (!attach_prog_fd) {
+ pr_warn("prog '%s': attach program FD is not set\n", prog->name);
+ return -EINVAL;
+ }
err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
if (err < 0) {
- pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
- attach_prog_fd, attach_name, err);
+ pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
+ prog->name, attach_prog_fd, attach_name, err);
return err;
}
*btf_obj_fd = 0;
@@ -9105,7 +9098,8 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
}
if (err) {
- pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
+ pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
+ prog->name, attach_name, err);
return err;
}
return 0;
@@ -9910,7 +9904,7 @@ static bool use_debugfs(void)
static int has_debugfs = -1;
if (has_debugfs < 0)
- has_debugfs = access(DEBUGFS, F_OK) == 0;
+ has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
return has_debugfs == 1;
}
@@ -10727,7 +10721,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
continue;
snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
/* ensure it has required permissions */
- if (access(result, perm) < 0)
+ if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
continue;
pr_debug("resolved '%s' to '%s'\n", file, result);
return 0;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 88a1ac34b12a..eee883f007f9 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -118,7 +118,9 @@ struct bpf_object_open_opts {
* auto-pinned to that path on load; defaults to "/sys/fs/bpf".
*/
const char *pin_root_path;
- long :0;
+
+ __u32 :32; /* stub out now removed attach_prog_fd */
+
/* Additional kernel config content that augments and overrides
* system Kconfig for CONFIG_xxx externs.
*/
@@ -1011,6 +1013,7 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook,
/* Ring buffer APIs */
struct ring_buffer;
+struct user_ring_buffer;
typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
@@ -1030,6 +1033,112 @@ LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
+struct user_ring_buffer_opts {
+ size_t sz; /* size of this struct, for forward/backward compatibility */
+};
+
+#define user_ring_buffer_opts__last_field sz
+
+/* @brief **user_ring_buffer__new()** creates a new instance of a user ring
+ * buffer.
+ *
+ * @param map_fd A file descriptor to a BPF_MAP_TYPE_USER_RINGBUF map.
+ * @param opts Options for how the ring buffer should be created.
+ * @return A user ring buffer on success; NULL and errno being set on a
+ * failure.
+ */
+LIBBPF_API struct user_ring_buffer *
+user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts);
+
+/* @brief **user_ring_buffer__reserve()** reserves a pointer to a sample in the
+ * user ring buffer.
+ * @param rb A pointer to a user ring buffer.
+ * @param size The size of the sample, in bytes.
+ * @return A pointer to an 8-byte aligned reserved region of the user ring
+ * buffer; NULL, and errno being set if a sample could not be reserved.
+ *
+ * This function is *not* thread safe, and callers must synchronize accessing
+ * this function if there are multiple producers. If a size is requested that
+ * is larger than the size of the entire ring buffer, errno will be set to
+ * E2BIG and NULL is returned. If the ring buffer could accommodate the size,
+ * but currently does not have enough space, errno is set to ENOSPC and NULL is
+ * returned.
+ *
+ * After initializing the sample, callers must invoke
+ * **user_ring_buffer__submit()** to post the sample to the kernel. Otherwise,
+ * the sample must be freed with **user_ring_buffer__discard()**.
+ */
+LIBBPF_API void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size);
+
+/* @brief **user_ring_buffer__reserve_blocking()** reserves a record in the
+ * ring buffer, possibly blocking for up to @timeout_ms until a sample becomes
+ * available.
+ * @param rb The user ring buffer.
+ * @param size The size of the sample, in bytes.
+ * @param timeout_ms The amount of time, in milliseconds, for which the caller
+ * should block when waiting for a sample. -1 causes the caller to block
+ * indefinitely.
+ * @return A pointer to an 8-byte aligned reserved region of the user ring
+ * buffer; NULL, and errno being set if a sample could not be reserved.
+ *
+ * This function is *not* thread safe, and callers must synchronize
+ * accessing this function if there are multiple producers
+ *
+ * If **timeout_ms** is -1, the function will block indefinitely until a sample
+ * becomes available. Otherwise, **timeout_ms** must be non-negative, or errno
+ * is set to EINVAL, and NULL is returned. If **timeout_ms** is 0, no blocking
+ * will occur and the function will return immediately after attempting to
+ * reserve a sample.
+ *
+ * If **size** is larger than the size of the entire ring buffer, errno is set
+ * to E2BIG and NULL is returned. If the ring buffer could accommodate
+ * **size**, but currently does not have enough space, the caller will block
+ * until at most **timeout_ms** has elapsed. If insufficient space is available
+ * at that time, errno is set to ENOSPC, and NULL is returned.
+ *
+ * The kernel guarantees that it will wake up this thread to check if
+ * sufficient space is available in the ring buffer at least once per
+ * invocation of the **bpf_ringbuf_drain()** helper function, provided that at
+ * least one sample is consumed, and the BPF program did not invoke the
+ * function with BPF_RB_NO_WAKEUP. A wakeup may occur sooner than that, but the
+ * kernel does not guarantee this. If the helper function is invoked with
+ * BPF_RB_FORCE_WAKEUP, a wakeup event will be sent even if no sample is
+ * consumed.
+ *
+ * When a sample of size **size** is found within **timeout_ms**, a pointer to
+ * the sample is returned. After initializing the sample, callers must invoke
+ * **user_ring_buffer__submit()** to post the sample to the ring buffer.
+ * Otherwise, the sample must be freed with **user_ring_buffer__discard()**.
+ */
+LIBBPF_API void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb,
+ __u32 size,
+ int timeout_ms);
+
+/* @brief **user_ring_buffer__submit()** submits a previously reserved sample
+ * into the ring buffer.
+ * @param rb The user ring buffer.
+ * @param sample A reserved sample.
+ *
+ * It is not necessary to synchronize amongst multiple producers when invoking
+ * this function.
+ */
+LIBBPF_API void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample);
+
+/* @brief **user_ring_buffer__discard()** discards a previously reserved sample.
+ * @param rb The user ring buffer.
+ * @param sample A reserved sample.
+ *
+ * It is not necessary to synchronize amongst multiple producers when invoking
+ * this function.
+ */
+LIBBPF_API void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample);
+
+/* @brief **user_ring_buffer__free()** frees a ring buffer that was previously
+ * created with **user_ring_buffer__new()**.
+ * @param rb The user ring buffer being freed.
+ */
+LIBBPF_API void user_ring_buffer__free(struct user_ring_buffer *rb);
+
/* Perf buffer APIs */
struct perf_buffer;
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 2b928dc21af0..c1d6aa7c82b6 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -368,3 +368,13 @@ LIBBPF_1.0.0 {
libbpf_bpf_prog_type_str;
perf_buffer__buffer;
};
+
+LIBBPF_1.1.0 {
+ global:
+ user_ring_buffer__discard;
+ user_ring_buffer__free;
+ user_ring_buffer__new;
+ user_ring_buffer__reserve;
+ user_ring_buffer__reserve_blocking;
+ user_ring_buffer__submit;
+} LIBBPF_1.0.0;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 6d495656f554..f3a8e8e74eb8 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -231,6 +231,7 @@ static int probe_map_create(enum bpf_map_type map_type)
return btf_fd;
break;
case BPF_MAP_TYPE_RINGBUF:
+ case BPF_MAP_TYPE_USER_RINGBUF:
key_size = 0;
value_size = 0;
max_entries = 4096;
diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h
index 2fb2f4290080..e944f5bce728 100644
--- a/tools/lib/bpf/libbpf_version.h
+++ b/tools/lib/bpf/libbpf_version.h
@@ -4,6 +4,6 @@
#define __LIBBPF_VERSION_H
#define LIBBPF_MAJOR_VERSION 1
-#define LIBBPF_MINOR_VERSION 0
+#define LIBBPF_MINOR_VERSION 1
#endif /* __LIBBPF_VERSION_H */
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index f57e77a6e40f..3900d052ed19 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -32,7 +32,7 @@ static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
static int nla_ok(const struct nlattr *nla, int remaining)
{
- return remaining >= sizeof(*nla) &&
+ return remaining >= (int)sizeof(*nla) &&
nla->nla_len >= sizeof(*nla) &&
nla->nla_len <= remaining;
}
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 8bc117bcc7bc..d285171d4b69 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -16,6 +16,7 @@
#include <asm/barrier.h>
#include <sys/mman.h>
#include <sys/epoll.h>
+#include <time.h>
#include "libbpf.h"
#include "libbpf_internal.h"
@@ -39,6 +40,23 @@ struct ring_buffer {
int ring_cnt;
};
+struct user_ring_buffer {
+ struct epoll_event event;
+ unsigned long *consumer_pos;
+ unsigned long *producer_pos;
+ void *data;
+ unsigned long mask;
+ size_t page_size;
+ int map_fd;
+ int epoll_fd;
+};
+
+/* 8-byte ring buffer header structure */
+struct ringbuf_hdr {
+ __u32 len;
+ __u32 pad;
+};
+
static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
{
if (r->consumer_pos) {
@@ -300,3 +318,256 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb)
{
return rb->epoll_fd;
}
+
+static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb)
+{
+ if (rb->consumer_pos) {
+ munmap(rb->consumer_pos, rb->page_size);
+ rb->consumer_pos = NULL;
+ }
+ if (rb->producer_pos) {
+ munmap(rb->producer_pos, rb->page_size + 2 * (rb->mask + 1));
+ rb->producer_pos = NULL;
+ }
+}
+
+void user_ring_buffer__free(struct user_ring_buffer *rb)
+{
+ if (!rb)
+ return;
+
+ user_ringbuf_unmap_ring(rb);
+
+ if (rb->epoll_fd >= 0)
+ close(rb->epoll_fd);
+
+ free(rb);
+}
+
+static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd)
+{
+ struct bpf_map_info info;
+ __u32 len = sizeof(info);
+ void *tmp;
+ struct epoll_event *rb_epoll;
+ int err;
+
+ memset(&info, 0, sizeof(info));
+
+ err = bpf_obj_get_info_by_fd(map_fd, &info, &len);
+ if (err) {
+ err = -errno;
+ pr_warn("user ringbuf: failed to get map info for fd=%d: %d\n", map_fd, err);
+ return err;
+ }
+
+ if (info.type != BPF_MAP_TYPE_USER_RINGBUF) {
+ pr_warn("user ringbuf: map fd=%d is not BPF_MAP_TYPE_USER_RINGBUF\n", map_fd);
+ return -EINVAL;
+ }
+
+ rb->map_fd = map_fd;
+ rb->mask = info.max_entries - 1;
+
+ /* Map read-only consumer page */
+ tmp = mmap(NULL, rb->page_size, PROT_READ, MAP_SHARED, map_fd, 0);
+ if (tmp == MAP_FAILED) {
+ err = -errno;
+ pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+ rb->consumer_pos = tmp;
+
+ /* Map read-write the producer page and data pages. We map the data
+ * region as twice the total size of the ring buffer to allow the
+ * simple reading and writing of samples that wrap around the end of
+ * the buffer. See the kernel implementation for details.
+ */
+ tmp = mmap(NULL, rb->page_size + 2 * info.max_entries,
+ PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, rb->page_size);
+ if (tmp == MAP_FAILED) {
+ err = -errno;
+ pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+
+ rb->producer_pos = tmp;
+ rb->data = tmp + rb->page_size;
+
+ rb_epoll = &rb->event;
+ rb_epoll->events = EPOLLOUT;
+ if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, rb_epoll) < 0) {
+ err = -errno;
+ pr_warn("user ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err);
+ return err;
+ }
+
+ return 0;
+}
+
+struct user_ring_buffer *
+user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts)
+{
+ struct user_ring_buffer *rb;
+ int err;
+
+ if (!OPTS_VALID(opts, user_ring_buffer_opts))
+ return errno = EINVAL, NULL;
+
+ rb = calloc(1, sizeof(*rb));
+ if (!rb)
+ return errno = ENOMEM, NULL;
+
+ rb->page_size = getpagesize();
+
+ rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (rb->epoll_fd < 0) {
+ err = -errno;
+ pr_warn("user ringbuf: failed to create epoll instance: %d\n", err);
+ goto err_out;
+ }
+
+ err = user_ringbuf_map(rb, map_fd);
+ if (err)
+ goto err_out;
+
+ return rb;
+
+err_out:
+ user_ring_buffer__free(rb);
+ return errno = -err, NULL;
+}
+
+static void user_ringbuf_commit(struct user_ring_buffer *rb, void *sample, bool discard)
+{
+ __u32 new_len;
+ struct ringbuf_hdr *hdr;
+ uintptr_t hdr_offset;
+
+ hdr_offset = rb->mask + 1 + (sample - rb->data) - BPF_RINGBUF_HDR_SZ;
+ hdr = rb->data + (hdr_offset & rb->mask);
+
+ new_len = hdr->len & ~BPF_RINGBUF_BUSY_BIT;
+ if (discard)
+ new_len |= BPF_RINGBUF_DISCARD_BIT;
+
+ /* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in
+ * the kernel.
+ */
+ __atomic_exchange_n(&hdr->len, new_len, __ATOMIC_ACQ_REL);
+}
+
+void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample)
+{
+ user_ringbuf_commit(rb, sample, true);
+}
+
+void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample)
+{
+ user_ringbuf_commit(rb, sample, false);
+}
+
+void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size)
+{
+ __u32 avail_size, total_size, max_size;
+ /* 64-bit to avoid overflow in case of extreme application behavior */
+ __u64 cons_pos, prod_pos;
+ struct ringbuf_hdr *hdr;
+
+ /* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in
+ * the kernel.
+ */
+ cons_pos = smp_load_acquire(rb->consumer_pos);
+ /* Synchronizes with smp_store_release() in user_ringbuf_commit() */
+ prod_pos = smp_load_acquire(rb->producer_pos);
+
+ max_size = rb->mask + 1;
+ avail_size = max_size - (prod_pos - cons_pos);
+ /* Round up total size to a multiple of 8. */
+ total_size = (size + BPF_RINGBUF_HDR_SZ + 7) / 8 * 8;
+
+ if (total_size > max_size)
+ return errno = E2BIG, NULL;
+
+ if (avail_size < total_size)
+ return errno = ENOSPC, NULL;
+
+ hdr = rb->data + (prod_pos & rb->mask);
+ hdr->len = size | BPF_RINGBUF_BUSY_BIT;
+ hdr->pad = 0;
+
+ /* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in
+ * the kernel.
+ */
+ smp_store_release(rb->producer_pos, prod_pos + total_size);
+
+ return (void *)rb->data + ((prod_pos + BPF_RINGBUF_HDR_SZ) & rb->mask);
+}
+
+static __u64 ns_elapsed_timespec(const struct timespec *start, const struct timespec *end)
+{
+ __u64 start_ns, end_ns, ns_per_s = 1000000000;
+
+ start_ns = (__u64)start->tv_sec * ns_per_s + start->tv_nsec;
+ end_ns = (__u64)end->tv_sec * ns_per_s + end->tv_nsec;
+
+ return end_ns - start_ns;
+}
+
+void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb, __u32 size, int timeout_ms)
+{
+ void *sample;
+ int err, ms_remaining = timeout_ms;
+ struct timespec start;
+
+ if (timeout_ms < 0 && timeout_ms != -1)
+ return errno = EINVAL, NULL;
+
+ if (timeout_ms != -1) {
+ err = clock_gettime(CLOCK_MONOTONIC, &start);
+ if (err)
+ return NULL;
+ }
+
+ do {
+ int cnt, ms_elapsed;
+ struct timespec curr;
+ __u64 ns_per_ms = 1000000;
+
+ sample = user_ring_buffer__reserve(rb, size);
+ if (sample)
+ return sample;
+ else if (errno != ENOSPC)
+ return NULL;
+
+ /* The kernel guarantees at least one event notification
+ * delivery whenever at least one sample is drained from the
+ * ring buffer in an invocation to bpf_ringbuf_drain(). Other
+ * additional events may be delivered at any time, but only one
+ * event is guaranteed per bpf_ringbuf_drain() invocation,
+ * provided that a sample is drained, and the BPF program did
+ * not pass BPF_RB_NO_WAKEUP to bpf_ringbuf_drain(). If
+ * BPF_RB_FORCE_WAKEUP is passed to bpf_ringbuf_drain(), a
+ * wakeup event will be delivered even if no samples are
+ * drained.
+ */
+ cnt = epoll_wait(rb->epoll_fd, &rb->event, 1, ms_remaining);
+ if (cnt < 0)
+ return NULL;
+
+ if (timeout_ms == -1)
+ continue;
+
+ err = clock_gettime(CLOCK_MONOTONIC, &curr);
+ if (err)
+ return NULL;
+
+ ms_elapsed = ns_elapsed_timespec(&start, &curr) / ns_per_ms;
+ ms_remaining = timeout_ms - ms_elapsed;
+ } while (ms_remaining > 0);
+
+ /* Try one more time to reserve a sample after the specified timeout has elapsed. */
+ return user_ring_buffer__reserve(rb, size);
+}
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index d18e37982344..e83b497c2245 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -282,7 +282,7 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
* If this is not supported, USDTs with semaphores will not be supported.
* Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
*/
- man->has_sema_refcnt = access(ref_ctr_sysfs_path, F_OK) == 0;
+ man->has_sema_refcnt = faccessat(AT_FDCWD, ref_ctr_sysfs_path, F_OK, AT_EACCESS) == 0;
return man;
}