diff options
| author | Mark Brown <broonie@kernel.org> | 2020-12-11 17:47:55 +0000 | 
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2020-12-11 17:47:55 +0000 | 
| commit | 031616c434db05ce766f76c62865f55698e0924f (patch) | |
| tree | 7f29aa1ff3e7b51a8058cd570fb785c6e769b245 /tools/lib/bpf | |
| parent | 064841ccfc49b2315dc0b797239862d3a343aa07 (diff) | |
| parent | 85a7555575a0e48f9b73db310d0d762a08a46d63 (diff) | |
Merge remote-tracking branch 'asoc/for-5.10' into asoc-linus
Diffstat (limited to 'tools/lib/bpf')
| -rw-r--r-- | tools/lib/bpf/Makefile | 28 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf.c | 70 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf.h | 39 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf_core_read.h | 120 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf_helpers.h | 51 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf_prog_linfo.c | 3 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf_tracing.h | 4 | ||||
| -rw-r--r-- | tools/lib/bpf/btf.c | 1631 | ||||
| -rw-r--r-- | tools/lib/bpf/btf.h | 103 | ||||
| -rw-r--r-- | tools/lib/bpf/btf_dump.c | 87 | ||||
| -rw-r--r-- | tools/lib/bpf/hashmap.c | 3 | ||||
| -rw-r--r-- | tools/lib/bpf/hashmap.h | 12 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf.c | 3405 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf.h | 12 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf.map | 38 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf_common.h | 2 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf_internal.h | 147 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf_probes.c | 8 | ||||
| -rw-r--r-- | tools/lib/bpf/netlink.c | 128 | ||||
| -rw-r--r-- | tools/lib/bpf/nlattr.c | 9 | ||||
| -rw-r--r-- | tools/lib/bpf/ringbuf.c | 8 | ||||
| -rw-r--r-- | tools/lib/bpf/xsk.c | 383 | ||||
| -rw-r--r-- | tools/lib/bpf/xsk.h | 9 | 
23 files changed, 4727 insertions, 1573 deletions
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 9ae8f4ef0aac..5f9abed3e226 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -1,6 +1,9 @@  # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)  # Most of this file is copied from tools/lib/traceevent/Makefile +RM ?= rm +srctree = $(abs_srctree) +  LIBBPF_VERSION := $(shell \  	grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \  	sort -rV | head -n1 | cut -d'_' -f2) @@ -56,7 +59,7 @@ ifndef VERBOSE  endif  FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray +FEATURE_TESTS = libelf zlib bpf  FEATURE_DISPLAY = libelf zlib bpf  INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi @@ -95,27 +98,18 @@ PC_FILE		= libbpf.pc  ifdef EXTRA_CFLAGS    CFLAGS := $(EXTRA_CFLAGS)  else -  CFLAGS := -g -Wall -endif - -ifeq ($(feature-libelf-mmap), 1) -  override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT -endif - -ifeq ($(feature-reallocarray), 0) -  override CFLAGS += -DCOMPAT_NEED_REALLOCARRAY +  CFLAGS := -g -O2  endif  # Append required CFLAGS -override CFLAGS += $(EXTRA_WARNINGS) +override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum  override CFLAGS += -Werror -Wall -override CFLAGS += -fPIC  override CFLAGS += $(INCLUDES)  override CFLAGS += -fvisibility=hidden  override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64  # flags specific for shared library -SHLIB_FLAGS := -DSHARED +SHLIB_FLAGS := -DSHARED -fPIC  ifeq ($(VERBOSE),1)    Q = @@ -197,7 +191,7 @@ $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)  	@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)  $(OUTPUT)libbpf.a: $(BPF_IN_STATIC) -	$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ +	$(QUIET_LINK)$(RM) -f $@; $(AR) rcs $@ $^  $(OUTPUT)libbpf.pc:  	$(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ @@ -271,10 +265,10 @@ install: install_lib install_pkgconfig install_headers  ### Cleaning rules  config-clean: -	$(call QUIET_CLEAN, config) +	$(call QUIET_CLEAN, feature-detect)  	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null -clean: +clean: config-clean  	$(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS)		     \  		*~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS)		     \  		$(SHARED_OBJDIR) $(STATIC_OBJDIR)			     \ @@ -301,7 +295,7 @@ cscope:  	cscope -b -q -I $(srctree)/include -f cscope.out  tags: -	rm -f TAGS tags +	$(RM) -f TAGS tags  	ls *.c *.h | xargs $(TAGS_PROG) -a  # Declare the contents of the .PHONY variable as phony.  We keep that diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 0750681057c2..d27e34133973 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -32,9 +32,6 @@  #include "libbpf.h"  #include "libbpf_internal.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -  /*   * When building perf, unistd.h is overridden. __NR_bpf is   * required to be defined explicitly. @@ -589,19 +586,31 @@ int bpf_link_create(int prog_fd, int target_fd,  		    enum bpf_attach_type attach_type,  		    const struct bpf_link_create_opts *opts)  { +	__u32 target_btf_id, iter_info_len;  	union bpf_attr attr;  	if (!OPTS_VALID(opts, bpf_link_create_opts))  		return -EINVAL; +	iter_info_len = OPTS_GET(opts, iter_info_len, 0); +	target_btf_id = OPTS_GET(opts, target_btf_id, 0); + +	if (iter_info_len && target_btf_id) +		return -EINVAL; +  	memset(&attr, 0, sizeof(attr));  	attr.link_create.prog_fd = prog_fd;  	attr.link_create.target_fd = target_fd;  	attr.link_create.attach_type = attach_type;  	attr.link_create.flags = OPTS_GET(opts, flags, 0); -	attr.link_create.iter_info = -		ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); -	attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0); + +	if (iter_info_len) { +		attr.link_create.iter_info = +			ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0)); +		attr.link_create.iter_info_len = iter_info_len; +	} else if (target_btf_id) { +		attr.link_create.target_btf_id = target_btf_id; +	}  	return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));  } @@ -715,6 +724,37 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)  	return ret;  } +int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) +{ +	union bpf_attr attr; +	int ret; + +	if (!OPTS_VALID(opts, bpf_test_run_opts)) +		return -EINVAL; + +	memset(&attr, 0, sizeof(attr)); +	attr.test.prog_fd = prog_fd; +	attr.test.cpu = OPTS_GET(opts, cpu, 0); +	attr.test.flags = OPTS_GET(opts, flags, 0); +	attr.test.repeat = OPTS_GET(opts, repeat, 0); +	attr.test.duration = OPTS_GET(opts, duration, 0); +	attr.test.ctx_size_in = OPTS_GET(opts, ctx_size_in, 0); +	attr.test.ctx_size_out = OPTS_GET(opts, ctx_size_out, 0); +	attr.test.data_size_in = OPTS_GET(opts, data_size_in, 0); +	attr.test.data_size_out = OPTS_GET(opts, data_size_out, 0); +	attr.test.ctx_in = ptr_to_u64(OPTS_GET(opts, ctx_in, NULL)); +	attr.test.ctx_out = ptr_to_u64(OPTS_GET(opts, ctx_out, NULL)); +	attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL)); +	attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL)); + +	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); +	OPTS_SET(opts, data_size_out, attr.test.data_size_out); +	OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out); +	OPTS_SET(opts, duration, attr.test.duration); +	OPTS_SET(opts, retval, attr.test.retval); +	return ret; +} +  static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)  {  	union bpf_attr attr; @@ -818,7 +858,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)  	return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));  } -int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, +int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,  		 bool do_log)  {  	union bpf_attr attr = {}; @@ -875,3 +915,19 @@ int bpf_enable_stats(enum bpf_stats_type type)  	return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));  } + +int bpf_prog_bind_map(int prog_fd, int map_fd, +		      const struct bpf_prog_bind_opts *opts) +{ +	union bpf_attr attr; + +	if (!OPTS_VALID(opts, bpf_prog_bind_opts)) +		return -EINVAL; + +	memset(&attr, 0, sizeof(attr)); +	attr.prog_bind_map.prog_fd = prog_fd; +	attr.prog_bind_map.map_fd = map_fd; +	attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0); + +	return sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr)); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 015d13f25fcc..875dde20d56e 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -174,8 +174,9 @@ struct bpf_link_create_opts {  	__u32 flags;  	union bpf_iter_link_info *iter_info;  	__u32 iter_info_len; +	__u32 target_btf_id;  }; -#define bpf_link_create_opts__last_field iter_info_len +#define bpf_link_create_opts__last_field target_btf_id  LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,  			       enum bpf_attach_type attach_type, @@ -234,7 +235,7 @@ LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,  			      __u32 query_flags, __u32 *attach_flags,  			      __u32 *prog_ids, __u32 *prog_cnt);  LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); -LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, +LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,  			    __u32 log_buf_size, bool do_log);  LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,  				 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, @@ -243,6 +244,40 @@ LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,  enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */  LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type); +struct bpf_prog_bind_opts { +	size_t sz; /* size of this struct for forward/backward compatibility */ +	__u32 flags; +}; +#define bpf_prog_bind_opts__last_field flags + +LIBBPF_API int bpf_prog_bind_map(int prog_fd, int map_fd, +				 const struct bpf_prog_bind_opts *opts); + +struct bpf_test_run_opts { +	size_t sz; /* size of this struct for forward/backward compatibility */ +	const void *data_in; /* optional */ +	void *data_out;      /* optional */ +	__u32 data_size_in; +	__u32 data_size_out; /* in: max length of data_out +			      * out: length of data_out +			      */ +	const void *ctx_in; /* optional */ +	void *ctx_out;      /* optional */ +	__u32 ctx_size_in; +	__u32 ctx_size_out; /* in: max length of ctx_out +			     * out: length of cxt_out +			     */ +	__u32 retval;        /* out: return code of the BPF program */ +	int repeat; +	__u32 duration;      /* out: average per repetition in ns */ +	__u32 flags; +	__u32 cpu; +}; +#define bpf_test_run_opts__last_field cpu + +LIBBPF_API int bpf_prog_test_run_opts(int prog_fd, +				      struct bpf_test_run_opts *opts); +  #ifdef __cplusplus  } /* extern "C" */  #endif diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index eae5cccff761..bbcefb3ff5a5 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -19,32 +19,52 @@ enum bpf_field_info_kind {  	BPF_FIELD_RSHIFT_U64 = 5,  }; +/* second argument to __builtin_btf_type_id() built-in */ +enum bpf_type_id_kind { +	BPF_TYPE_ID_LOCAL = 0,		/* BTF type ID in local program */ +	BPF_TYPE_ID_TARGET = 1,		/* BTF type ID in target kernel */ +}; + +/* second argument to __builtin_preserve_type_info() built-in */ +enum bpf_type_info_kind { +	BPF_TYPE_EXISTS = 0,		/* type existence in target kernel */ +	BPF_TYPE_SIZE = 1,		/* type size in target kernel */ +}; + +/* second argument to __builtin_preserve_enum_value() built-in */ +enum bpf_enum_value_kind { +	BPF_ENUMVAL_EXISTS = 0,		/* enum value existence in kernel */ +	BPF_ENUMVAL_VALUE = 1,		/* enum value value relocation */ +}; +  #define __CORE_RELO(src, field, info)					      \  	__builtin_preserve_field_info((src)->field, BPF_FIELD_##info)  #if __BYTE_ORDER == __LITTLE_ENDIAN  #define __CORE_BITFIELD_PROBE_READ(dst, src, fld)			      \ -	bpf_probe_read((void *)dst,					      \ -		       __CORE_RELO(src, fld, BYTE_SIZE),		      \ -		       (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) +	bpf_probe_read_kernel(						      \ +			(void *)dst,				      \ +			__CORE_RELO(src, fld, BYTE_SIZE),		      \ +			(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))  #else  /* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so   * for big-endian we need to adjust destination pointer accordingly, based on   * field byte size   */  #define __CORE_BITFIELD_PROBE_READ(dst, src, fld)			      \ -	bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)),  \ -		       __CORE_RELO(src, fld, BYTE_SIZE),		      \ -		       (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) +	bpf_probe_read_kernel(						      \ +			(void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \ +			__CORE_RELO(src, fld, BYTE_SIZE),		      \ +			(const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))  #endif  /*   * Extract bitfield, identified by s->field, and return its value as u64.   * All this is done in relocatable manner, so bitfield changes such as   * signedness, bit size, offset changes, this will be handled automatically. - * This version of macro is using bpf_probe_read() to read underlying integer - * storage. Macro functions as an expression and its return type is - * bpf_probe_read()'s return value: 0, on success, <0 on error. + * This version of macro is using bpf_probe_read_kernel() to read underlying + * integer storage. Macro functions as an expression and its return type is + * bpf_probe_read_kernel()'s return value: 0, on success, <0 on error.   */  #define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({			      \  	unsigned long long val = 0;					      \ @@ -92,15 +112,75 @@ enum bpf_field_info_kind {  	__builtin_preserve_field_info(field, BPF_FIELD_EXISTS)  /* - * Convenience macro to get byte size of a field. Works for integers, + * Convenience macro to get the byte size of a field. Works for integers,   * struct/unions, pointers, arrays, and enums.   */  #define bpf_core_field_size(field)					    \  	__builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)  /* - * bpf_core_read() abstracts away bpf_probe_read() call and captures offset - * relocation for source address using __builtin_preserve_access_index() + * Convenience macro to get BTF type ID of a specified type, using a local BTF + * information. Return 32-bit unsigned integer with type ID from program's own + * BTF. Always succeeds. + */ +#define bpf_core_type_id_local(type)					    \ +	__builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_LOCAL) + +/* + * Convenience macro to get BTF type ID of a target kernel's type that matches + * specified local type. + * Returns: + *    - valid 32-bit unsigned type ID in kernel BTF; + *    - 0, if no matching type was found in a target kernel BTF. + */ +#define bpf_core_type_id_kernel(type)					    \ +	__builtin_btf_type_id(*(typeof(type) *)0, BPF_TYPE_ID_TARGET) + +/* + * Convenience macro to check that provided named type + * (struct/union/enum/typedef) exists in a target kernel. + * Returns: + *    1, if such type is present in target kernel's BTF; + *    0, if no matching type is found. + */ +#define bpf_core_type_exists(type)					    \ +	__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_EXISTS) + +/* + * Convenience macro to get the byte size of a provided named type + * (struct/union/enum/typedef) in a target kernel. + * Returns: + *    >= 0 size (in bytes), if type is present in target kernel's BTF; + *    0, if no matching type is found. + */ +#define bpf_core_type_size(type)					    \ +	__builtin_preserve_type_info(*(typeof(type) *)0, BPF_TYPE_SIZE) + +/* + * Convenience macro to check that provided enumerator value is defined in + * a target kernel. + * Returns: + *    1, if specified enum type and its enumerator value are present in target + *    kernel's BTF; + *    0, if no matching enum and/or enum value within that enum is found. + */ +#define bpf_core_enum_value_exists(enum_type, enum_value)		    \ +	__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_EXISTS) + +/* + * Convenience macro to get the integer value of an enumerator value in + * a target kernel. + * Returns: + *    64-bit value, if specified enum type and its enumerator value are + *    present in target kernel's BTF; + *    0, if no matching enum and/or enum value within that enum is found. + */ +#define bpf_core_enum_value(enum_type, enum_value)			    \ +	__builtin_preserve_enum_value(*(typeof(enum_type) *)enum_value, BPF_ENUMVAL_VALUE) + +/* + * bpf_core_read() abstracts away bpf_probe_read_kernel() call and captures + * offset relocation for source address using __builtin_preserve_access_index()   * built-in, provided by Clang.   *   * __builtin_preserve_access_index() takes as an argument an expression of @@ -115,8 +195,8 @@ enum bpf_field_info_kind {   * (local) BTF, used to record relocation.   */  #define bpf_core_read(dst, sz, src)					    \ -	bpf_probe_read(dst, sz,						    \ -		       (const void *)__builtin_preserve_access_index(src)) +	bpf_probe_read_kernel(dst, sz,					    \ +			      (const void *)__builtin_preserve_access_index(src))  /*   * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str() @@ -124,8 +204,8 @@ enum bpf_field_info_kind {   * argument.   */  #define bpf_core_read_str(dst, sz, src)					    \ -	bpf_probe_read_str(dst, sz,					    \ -			   (const void *)__builtin_preserve_access_index(src)) +	bpf_probe_read_kernel_str(dst, sz,				    \ +				  (const void *)__builtin_preserve_access_index(src))  #define ___concat(a, b) a ## b  #define ___apply(fn, n) ___concat(fn, n) @@ -239,15 +319,17 @@ enum bpf_field_info_kind {   *	int x = BPF_CORE_READ(s, a.b.c, d.e, f, g);   *   * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF - * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to: + * CO-RE relocatable bpf_probe_read_kernel() wrapper) calls, logically + * equivalent to:   * 1. const void *__t = s->a.b.c;   * 2. __t = __t->d.e;   * 3. __t = __t->f;   * 4. return __t->g;   *   * Equivalence is logical, because there is a heavy type casting/preservation - * involved, as well as all the reads are happening through bpf_probe_read() - * calls using __builtin_preserve_access_index() to emit CO-RE relocations. + * involved, as well as all the reads are happening through + * bpf_probe_read_kernel() calls using __builtin_preserve_access_index() to + * emit CO-RE relocations.   *   * N.B. Only up to 9 "field accessors" are supported, which should be more   * than enough for any practical purpose. diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index e9a4ecddb7a5..72b251110c4d 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -32,6 +32,9 @@  #ifndef __always_inline  #define __always_inline __attribute__((always_inline))  #endif +#ifndef __noinline +#define __noinline __attribute__((noinline)) +#endif  #ifndef __weak  #define __weak __attribute__((weak))  #endif @@ -51,6 +54,54 @@  #endif  /* + * Helper macro to throw a compilation error if __bpf_unreachable() gets + * built into the resulting code. This works given BPF back end does not + * implement __builtin_trap(). This is useful to assert that certain paths + * of the program code are never used and hence eliminated by the compiler. + * + * For example, consider a switch statement that covers known cases used by + * the program. __bpf_unreachable() can then reside in the default case. If + * the program gets extended such that a case is not covered in the switch + * statement, then it will throw a build error due to the default case not + * being compiled out. + */ +#ifndef __bpf_unreachable +# define __bpf_unreachable()	__builtin_trap() +#endif + +/* + * Helper function to perform a tail call with a constant/immediate map slot. + */ +#if __clang_major__ >= 8 && defined(__bpf__) +static __always_inline void +bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) +{ +	if (!__builtin_constant_p(slot)) +		__bpf_unreachable(); + +	/* +	 * Provide a hard guarantee that LLVM won't optimize setting r2 (map +	 * pointer) and r3 (constant map index) from _different paths_ ending +	 * up at the _same_ call insn as otherwise we won't be able to use the +	 * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel +	 * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key +	 * tracking for prog array pokes") for details on verifier tracking. +	 * +	 * Note on clobber list: we need to stay in-line with BPF calling +	 * convention, so even if we don't end up using r0, r4, r5, we need +	 * to mark them as clobber so that LLVM doesn't end up using them +	 * before / after the call. +	 */ +	asm volatile("r1 = %[ctx]\n\t" +		     "r2 = %[map]\n\t" +		     "r3 = %[slot]\n\t" +		     "call 12" +		     :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot) +		     : "r0", "r1", "r2", "r3", "r4", "r5"); +} +#endif + +/*   * Helper structure used by eBPF C program   * to describe BPF map attributes to libbpf loader   */ diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c index bafca49cb1e6..3ed1a27b5f7c 100644 --- a/tools/lib/bpf/bpf_prog_linfo.c +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -8,9 +8,6 @@  #include "libbpf.h"  #include "libbpf_internal.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -  struct bpf_prog_linfo {  	void *raw_linfo;  	void *raw_jited_linfo; diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index eebf020cbe3e..f9ef37707888 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -289,9 +289,9 @@ struct pt_regs;  #define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP  #else  #define BPF_KPROBE_READ_RET_IP(ip, ctx)					    \ -	({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) +	({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })  #define BPF_KRETPROBE_READ_RET_IP(ip, ctx)				    \ -	({ bpf_probe_read(&(ip), sizeof(ip),				    \ +	({ bpf_probe_read_kernel(&(ip), sizeof(ip),			    \  			  (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })  #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 6bdbc389b493..231b07203e3d 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1,6 +1,7 @@  // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)  /* Copyright (c) 2018 Facebook */ +#include <byteswap.h>  #include <endian.h>  #include <stdio.h>  #include <stdlib.h> @@ -21,26 +22,78 @@  #include "libbpf_internal.h"  #include "hashmap.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -  #define BTF_MAX_NR_TYPES 0x7fffffffU  #define BTF_MAX_STR_OFFSET 0x7fffffffU  static struct btf_type btf_void;  struct btf { -	union { -		struct btf_header *hdr; -		void *data; -	}; -	struct btf_type **types; -	const char *strings; -	void *nohdr_data; +	/* raw BTF data in native endianness */ +	void *raw_data; +	/* raw BTF data in non-native endianness */ +	void *raw_data_swapped; +	__u32 raw_size; +	/* whether target endianness differs from the native one */ +	bool swapped_endian; + +	/* +	 * When BTF is loaded from an ELF or raw memory it is stored +	 * in a contiguous memory block. The hdr, type_data, and, strs_data +	 * point inside that memory region to their respective parts of BTF +	 * representation: +	 * +	 * +--------------------------------+ +	 * |  Header  |  Types  |  Strings  | +	 * +--------------------------------+ +	 * ^          ^         ^ +	 * |          |         | +	 * hdr        |         | +	 * types_data-+         | +	 * strs_data------------+ +	 * +	 * If BTF data is later modified, e.g., due to types added or +	 * removed, BTF deduplication performed, etc, this contiguous +	 * representation is broken up into three independently allocated +	 * memory regions to be able to modify them independently. +	 * raw_data is nulled out at that point, but can be later allocated +	 * and cached again if user calls btf__get_raw_data(), at which point +	 * raw_data will contain a contiguous copy of header, types, and +	 * strings: +	 * +	 * +----------+  +---------+  +-----------+ +	 * |  Header  |  |  Types  |  |  Strings  | +	 * +----------+  +---------+  +-----------+ +	 * ^             ^            ^ +	 * |             |            | +	 * hdr           |            | +	 * types_data----+            | +	 * strs_data------------------+ +	 * +	 *               +----------+---------+-----------+ +	 *               |  Header  |  Types  |  Strings  | +	 * raw_data----->+----------+---------+-----------+ +	 */ +	struct btf_header *hdr; + +	void *types_data; +	size_t types_data_cap; /* used size stored in hdr->type_len */ + +	/* type ID to `struct btf_type *` lookup index */ +	__u32 *type_offs; +	size_t type_offs_cap;  	__u32 nr_types; -	__u32 types_size; -	__u32 data_size; + +	void *strs_data; +	size_t strs_data_cap; /* used size stored in hdr->str_len */ + +	/* lookup index for each unique string in strings section */ +	struct hashmap *strs_hash; +	/* whether strings are already deduplicated */ +	bool strs_deduped; +	/* BTF object FD, if loaded into kernel */  	int fd; + +	/* Pointer size (in bytes) for a target architecture of this BTF */  	int ptr_sz;  }; @@ -49,60 +102,114 @@ static inline __u64 ptr_to_u64(const void *ptr)  	return (__u64) (unsigned long) ptr;  } -static int btf_add_type(struct btf *btf, struct btf_type *t) +/* Ensure given dynamically allocated memory region pointed to by *data* with + * capacity of *cap_cnt* elements each taking *elem_sz* bytes has enough + * memory to accomodate *add_cnt* new elements, assuming *cur_cnt* elements + * are already used. At most *max_cnt* elements can be ever allocated. + * If necessary, memory is reallocated and all existing data is copied over, + * new pointer to the memory region is stored at *data, new memory region + * capacity (in number of elements) is stored in *cap. + * On success, memory pointer to the beginning of unused memory is returned. + * On error, NULL is returned. + */ +void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz, +		  size_t cur_cnt, size_t max_cnt, size_t add_cnt)  { -	if (btf->types_size - btf->nr_types < 2) { -		struct btf_type **new_types; -		__u32 expand_by, new_size; +	size_t new_cnt; +	void *new_data; -		if (btf->types_size == BTF_MAX_NR_TYPES) -			return -E2BIG; +	if (cur_cnt + add_cnt <= *cap_cnt) +		return *data + cur_cnt * elem_sz; -		expand_by = max(btf->types_size >> 2, 16U); -		new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by); +	/* requested more than the set limit */ +	if (cur_cnt + add_cnt > max_cnt) +		return NULL; -		new_types = realloc(btf->types, sizeof(*new_types) * new_size); -		if (!new_types) -			return -ENOMEM; +	new_cnt = *cap_cnt; +	new_cnt += new_cnt / 4;		  /* expand by 25% */ +	if (new_cnt < 16)		  /* but at least 16 elements */ +		new_cnt = 16; +	if (new_cnt > max_cnt)		  /* but not exceeding a set limit */ +		new_cnt = max_cnt; +	if (new_cnt < cur_cnt + add_cnt)  /* also ensure we have enough memory */ +		new_cnt = cur_cnt + add_cnt; -		if (btf->nr_types == 0) -			new_types[0] = &btf_void; +	new_data = libbpf_reallocarray(*data, new_cnt, elem_sz); +	if (!new_data) +		return NULL; -		btf->types = new_types; -		btf->types_size = new_size; -	} +	/* zero out newly allocated portion of memory */ +	memset(new_data + (*cap_cnt) * elem_sz, 0, (new_cnt - *cap_cnt) * elem_sz); -	btf->types[++(btf->nr_types)] = t; +	*data = new_data; +	*cap_cnt = new_cnt; +	return new_data + cur_cnt * elem_sz; +} +/* Ensure given dynamically allocated memory region has enough allocated space + * to accommodate *need_cnt* elements of size *elem_sz* bytes each + */ +int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt) +{ +	void *p; + +	if (need_cnt <= *cap_cnt) +		return 0; + +	p = btf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt); +	if (!p) +		return -ENOMEM; + +	return 0; +} + +static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) +{ +	__u32 *p; + +	p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32), +			btf->nr_types + 1, BTF_MAX_NR_TYPES, 1); +	if (!p) +		return -ENOMEM; + +	*p = type_off;  	return 0;  } +static void btf_bswap_hdr(struct btf_header *h) +{ +	h->magic = bswap_16(h->magic); +	h->hdr_len = bswap_32(h->hdr_len); +	h->type_off = bswap_32(h->type_off); +	h->type_len = bswap_32(h->type_len); +	h->str_off = bswap_32(h->str_off); +	h->str_len = bswap_32(h->str_len); +} +  static int btf_parse_hdr(struct btf *btf)  { -	const struct btf_header *hdr = btf->hdr; +	struct btf_header *hdr = btf->hdr;  	__u32 meta_left; -	if (btf->data_size < sizeof(struct btf_header)) { +	if (btf->raw_size < sizeof(struct btf_header)) {  		pr_debug("BTF header not found\n");  		return -EINVAL;  	} -	if (hdr->magic != BTF_MAGIC) { +	if (hdr->magic == bswap_16(BTF_MAGIC)) { +		btf->swapped_endian = true; +		if (bswap_32(hdr->hdr_len) != sizeof(struct btf_header)) { +			pr_warn("Can't load BTF with non-native endianness due to unsupported header length %u\n", +				bswap_32(hdr->hdr_len)); +			return -ENOTSUP; +		} +		btf_bswap_hdr(hdr); +	} else if (hdr->magic != BTF_MAGIC) {  		pr_debug("Invalid BTF magic:%x\n", hdr->magic);  		return -EINVAL;  	} -	if (hdr->version != BTF_VERSION) { -		pr_debug("Unsupported BTF version:%u\n", hdr->version); -		return -ENOTSUP; -	} - -	if (hdr->flags) { -		pr_debug("Unsupported BTF flags:%x\n", hdr->flags); -		return -ENOTSUP; -	} - -	meta_left = btf->data_size - sizeof(*hdr); +	meta_left = btf->raw_size - sizeof(*hdr);  	if (!meta_left) {  		pr_debug("BTF has no data\n");  		return -EINVAL; @@ -128,15 +235,13 @@ static int btf_parse_hdr(struct btf *btf)  		return -EINVAL;  	} -	btf->nohdr_data = btf->hdr + 1; -  	return 0;  }  static int btf_parse_str_sec(struct btf *btf)  {  	const struct btf_header *hdr = btf->hdr; -	const char *start = btf->nohdr_data + hdr->str_off; +	const char *start = btf->strs_data;  	const char *end = start + btf->hdr->str_len;  	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || @@ -145,14 +250,12 @@ static int btf_parse_str_sec(struct btf *btf)  		return -EINVAL;  	} -	btf->strings = start; -  	return 0;  } -static int btf_type_size(struct btf_type *t) +static int btf_type_size(const struct btf_type *t)  { -	int base_size = sizeof(struct btf_type); +	const int base_size = sizeof(struct btf_type);  	__u16 vlen = btf_vlen(t);  	switch (btf_kind(t)) { @@ -185,25 +288,120 @@ static int btf_type_size(struct btf_type *t)  	}  } +static void btf_bswap_type_base(struct btf_type *t) +{ +	t->name_off = bswap_32(t->name_off); +	t->info = bswap_32(t->info); +	t->type = bswap_32(t->type); +} + +static int btf_bswap_type_rest(struct btf_type *t) +{ +	struct btf_var_secinfo *v; +	struct btf_member *m; +	struct btf_array *a; +	struct btf_param *p; +	struct btf_enum *e; +	__u16 vlen = btf_vlen(t); +	int i; + +	switch (btf_kind(t)) { +	case BTF_KIND_FWD: +	case BTF_KIND_CONST: +	case BTF_KIND_VOLATILE: +	case BTF_KIND_RESTRICT: +	case BTF_KIND_PTR: +	case BTF_KIND_TYPEDEF: +	case BTF_KIND_FUNC: +		return 0; +	case BTF_KIND_INT: +		*(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1)); +		return 0; +	case BTF_KIND_ENUM: +		for (i = 0, e = btf_enum(t); i < vlen; i++, e++) { +			e->name_off = bswap_32(e->name_off); +			e->val = bswap_32(e->val); +		} +		return 0; +	case BTF_KIND_ARRAY: +		a = btf_array(t); +		a->type = bswap_32(a->type); +		a->index_type = bswap_32(a->index_type); +		a->nelems = bswap_32(a->nelems); +		return 0; +	case BTF_KIND_STRUCT: +	case BTF_KIND_UNION: +		for (i = 0, m = btf_members(t); i < vlen; i++, m++) { +			m->name_off = bswap_32(m->name_off); +			m->type = bswap_32(m->type); +			m->offset = bswap_32(m->offset); +		} +		return 0; +	case BTF_KIND_FUNC_PROTO: +		for (i = 0, p = btf_params(t); i < vlen; i++, p++) { +			p->name_off = bswap_32(p->name_off); +			p->type = bswap_32(p->type); +		} +		return 0; +	case BTF_KIND_VAR: +		btf_var(t)->linkage = bswap_32(btf_var(t)->linkage); +		return 0; +	case BTF_KIND_DATASEC: +		for (i = 0, v = btf_var_secinfos(t); i < vlen; i++, v++) { +			v->type = bswap_32(v->type); +			v->offset = bswap_32(v->offset); +			v->size = bswap_32(v->size); +		} +		return 0; +	default: +		pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); +		return -EINVAL; +	} +} +  static int btf_parse_type_sec(struct btf *btf)  {  	struct btf_header *hdr = btf->hdr; -	void *nohdr_data = btf->nohdr_data; -	void *next_type = nohdr_data + hdr->type_off; -	void *end_type = nohdr_data + hdr->str_off; +	void *next_type = btf->types_data; +	void *end_type = next_type + hdr->type_len; +	int err, i = 0, type_size; -	while (next_type < end_type) { -		struct btf_type *t = next_type; -		int type_size; -		int err; +	/* VOID (type_id == 0) is specially handled by btf__get_type_by_id(), +	 * so ensure we can never properly use its offset from index by +	 * setting it to a large value +	 */ +	err = btf_add_type_idx_entry(btf, UINT_MAX); +	if (err) +		return err; + +	while (next_type + sizeof(struct btf_type) <= end_type) { +		i++; -		type_size = btf_type_size(t); +		if (btf->swapped_endian) +			btf_bswap_type_base(next_type); + +		type_size = btf_type_size(next_type);  		if (type_size < 0)  			return type_size; -		next_type += type_size; -		err = btf_add_type(btf, t); +		if (next_type + type_size > end_type) { +			pr_warn("BTF type [%d] is malformed\n", i); +			return -EINVAL; +		} + +		if (btf->swapped_endian && btf_bswap_type_rest(next_type)) +			return -EINVAL; + +		err = btf_add_type_idx_entry(btf, next_type - btf->types_data);  		if (err)  			return err; + +		next_type += type_size; +		btf->nr_types++; +	} + +	if (next_type != end_type) { +		pr_warn("BTF types data is malformed\n"); +		return -EINVAL;  	}  	return 0; @@ -214,12 +412,20 @@ __u32 btf__get_nr_types(const struct btf *btf)  	return btf->nr_types;  } +/* internal helper returning non-const pointer to a type */ +static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id) +{ +	if (type_id == 0) +		return &btf_void; + +	return btf->types_data + btf->type_offs[type_id]; +} +  const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)  {  	if (type_id > btf->nr_types)  		return NULL; - -	return btf->types[type_id]; +	return btf_type_by_id((struct btf *)btf, type_id);  }  static int determine_ptr_size(const struct btf *btf) @@ -286,6 +492,38 @@ int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)  	return 0;  } +static bool is_host_big_endian(void) +{ +#if __BYTE_ORDER == __LITTLE_ENDIAN +	return false; +#elif __BYTE_ORDER == __BIG_ENDIAN +	return true; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif +} + +enum btf_endianness btf__endianness(const struct btf *btf) +{ +	if (is_host_big_endian()) +		return btf->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN; +	else +		return btf->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; +} + +int btf__set_endianness(struct btf *btf, enum btf_endianness endian) +{ +	if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN) +		return -EINVAL; + +	btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN); +	if (!btf->swapped_endian) { +		free(btf->raw_data_swapped); +		btf->raw_data_swapped = NULL; +	} +	return 0; +} +  static bool btf_type_is_void(const struct btf_type *t)  {  	return t == &btf_void || btf_is_fwd(t); @@ -417,7 +655,7 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)  		return 0;  	for (i = 1; i <= btf->nr_types; i++) { -		const struct btf_type *t = btf->types[i]; +		const struct btf_type *t = btf__type_by_id(btf, i);  		const char *name = btf__name_by_offset(btf, t->name_off);  		if (name && !strcmp(type_name, name)) @@ -436,7 +674,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,  		return 0;  	for (i = 1; i <= btf->nr_types; i++) { -		const struct btf_type *t = btf->types[i]; +		const struct btf_type *t = btf__type_by_id(btf, i);  		const char *name;  		if (btf_kind(t) != kind) @@ -449,6 +687,11 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,  	return -ENOENT;  } +static bool btf_is_modifiable(const struct btf *btf) +{ +	return (void *)btf->hdr != btf->raw_data; +} +  void btf__free(struct btf *btf)  {  	if (IS_ERR_OR_NULL(btf)) @@ -457,11 +700,55 @@ void btf__free(struct btf *btf)  	if (btf->fd >= 0)  		close(btf->fd); -	free(btf->data); -	free(btf->types); +	if (btf_is_modifiable(btf)) { +		/* if BTF was modified after loading, it will have a split +		 * in-memory representation for header, types, and strings +		 * sections, so we need to free all of them individually. It +		 * might still have a cached contiguous raw data present, +		 * which will be unconditionally freed below. +		 */ +		free(btf->hdr); +		free(btf->types_data); +		free(btf->strs_data); +	} +	free(btf->raw_data); +	free(btf->raw_data_swapped); +	free(btf->type_offs);  	free(btf);  } +struct btf *btf__new_empty(void) +{ +	struct btf *btf; + +	btf = calloc(1, sizeof(*btf)); +	if (!btf) +		return ERR_PTR(-ENOMEM); + +	btf->fd = -1; +	btf->ptr_sz = sizeof(void *); +	btf->swapped_endian = false; + +	/* +1 for empty string at offset 0 */ +	btf->raw_size = sizeof(struct btf_header) + 1; +	btf->raw_data = calloc(1, btf->raw_size); +	if (!btf->raw_data) { +		free(btf); +		return ERR_PTR(-ENOMEM); +	} + +	btf->hdr = btf->raw_data; +	btf->hdr->hdr_len = sizeof(struct btf_header); +	btf->hdr->magic = BTF_MAGIC; +	btf->hdr->version = BTF_VERSION; + +	btf->types_data = btf->raw_data + btf->hdr->hdr_len; +	btf->strs_data = btf->raw_data + btf->hdr->hdr_len; +	btf->hdr->str_len = 1; /* empty string at offset 0 */ + +	return btf; +} +  struct btf *btf__new(const void *data, __u32 size)  {  	struct btf *btf; @@ -471,26 +758,28 @@ struct btf *btf__new(const void *data, __u32 size)  	if (!btf)  		return ERR_PTR(-ENOMEM); -	btf->fd = -1; - -	btf->data = malloc(size); -	if (!btf->data) { +	btf->raw_data = malloc(size); +	if (!btf->raw_data) {  		err = -ENOMEM;  		goto done;  	} +	memcpy(btf->raw_data, data, size); +	btf->raw_size = size; -	memcpy(btf->data, data, size); -	btf->data_size = size; - +	btf->hdr = btf->raw_data;  	err = btf_parse_hdr(btf);  	if (err)  		goto done; +	btf->strs_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->str_off; +	btf->types_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->type_off; +  	err = btf_parse_str_sec(btf); +	err = err ?: btf_parse_type_sec(btf);  	if (err)  		goto done; -	err = btf_parse_type_sec(btf); +	btf->fd = -1;  done:  	if (err) { @@ -501,17 +790,6 @@ done:  	return btf;  } -static bool btf_check_endianness(const GElf_Ehdr *ehdr) -{ -#if __BYTE_ORDER == __LITTLE_ENDIAN -	return ehdr->e_ident[EI_DATA] == ELFDATA2LSB; -#elif __BYTE_ORDER == __BIG_ENDIAN -	return ehdr->e_ident[EI_DATA] == ELFDATA2MSB; -#else -# error "Unrecognized __BYTE_ORDER__" -#endif -} -  struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)  {  	Elf_Data *btf_data = NULL, *btf_ext_data = NULL; @@ -544,10 +822,6 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)  		pr_warn("failed to get EHDR from %s\n", path);  		goto done;  	} -	if (!btf_check_endianness(&ehdr)) { -		pr_warn("non-native ELF endianness is not supported\n"); -		goto done; -	}  	if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {  		pr_warn("failed to get e_shstrndx from %s\n", path);  		goto done; @@ -659,13 +933,7 @@ struct btf *btf__parse_raw(const char *path)  		err = -EIO;  		goto err_out;  	} -	if (magic == __bswap_16(BTF_MAGIC)) { -		/* non-native endian raw BTF */ -		pr_warn("non-native BTF endianness is not supported\n"); -		err = -LIBBPF_ERRNO__ENDIAN; -		goto err_out; -	} -	if (magic != BTF_MAGIC) { +	if (magic != BTF_MAGIC && magic != bswap_16(BTF_MAGIC)) {  		/* definitely not a raw BTF */  		err = -EPROTO;  		goto err_out; @@ -798,7 +1066,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)  	__u32 i;  	for (i = 1; i <= btf->nr_types; i++) { -		struct btf_type *t = btf->types[i]; +		struct btf_type *t = btf_type_by_id(btf, i);  		/* Loader needs to fix up some of the things compiler  		 * couldn't get its hands on while emitting BTF. This @@ -815,10 +1083,13 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)  	return err;  } +static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); +  int btf__load(struct btf *btf)  { -	__u32 log_buf_size = 0; +	__u32 log_buf_size = 0, raw_size;  	char *log_buf = NULL; +	void *raw_data;  	int err = 0;  	if (btf->fd >= 0) @@ -833,8 +1104,16 @@ retry_load:  		*log_buf = 0;  	} -	btf->fd = bpf_load_btf(btf->data, btf->data_size, -			       log_buf, log_buf_size, false); +	raw_data = btf_get_raw_data(btf, &raw_size, false); +	if (!raw_data) { +		err = -ENOMEM; +		goto done; +	} +	/* cache native raw data representation */ +	btf->raw_size = raw_size; +	btf->raw_data = raw_data; + +	btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false);  	if (btf->fd < 0) {  		if (!log_buf || errno == ENOSPC) {  			log_buf_size = max((__u32)BPF_LOG_BUF_SIZE, @@ -865,20 +1144,88 @@ void btf__set_fd(struct btf *btf, int fd)  	btf->fd = fd;  } -const void *btf__get_raw_data(const struct btf *btf, __u32 *size) +static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian)  { -	*size = btf->data_size; -	return btf->data; +	struct btf_header *hdr = btf->hdr; +	struct btf_type *t; +	void *data, *p; +	__u32 data_sz; +	int i; + +	data = swap_endian ? btf->raw_data_swapped : btf->raw_data; +	if (data) { +		*size = btf->raw_size; +		return data; +	} + +	data_sz = hdr->hdr_len + hdr->type_len + hdr->str_len; +	data = calloc(1, data_sz); +	if (!data) +		return NULL; +	p = data; + +	memcpy(p, hdr, hdr->hdr_len); +	if (swap_endian) +		btf_bswap_hdr(p); +	p += hdr->hdr_len; + +	memcpy(p, btf->types_data, hdr->type_len); +	if (swap_endian) { +		for (i = 1; i <= btf->nr_types; i++) { +			t = p  + btf->type_offs[i]; +			/* btf_bswap_type_rest() relies on native t->info, so +			 * we swap base type info after we swapped all the +			 * additional information +			 */ +			if (btf_bswap_type_rest(t)) +				goto err_out; +			btf_bswap_type_base(t); +		} +	} +	p += hdr->type_len; + +	memcpy(p, btf->strs_data, hdr->str_len); +	p += hdr->str_len; + +	*size = data_sz; +	return data; +err_out: +	free(data); +	return NULL;  } -const char *btf__name_by_offset(const struct btf *btf, __u32 offset) +const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) +{ +	struct btf *btf = (struct btf *)btf_ro; +	__u32 data_sz; +	void *data; + +	data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian); +	if (!data) +		return NULL; + +	btf->raw_size = data_sz; +	if (btf->swapped_endian) +		btf->raw_data_swapped = data; +	else +		btf->raw_data = data; +	*size = data_sz; +	return data; +} + +const char *btf__str_by_offset(const struct btf *btf, __u32 offset)  {  	if (offset < btf->hdr->str_len) -		return &btf->strings[offset]; +		return btf->strs_data + offset;  	else  		return NULL;  } +const char *btf__name_by_offset(const struct btf *btf, __u32 offset) +{ +	return btf__str_by_offset(btf, offset); +} +  int btf__get_from_id(__u32 id, struct btf **btf)  {  	struct bpf_btf_info btf_info = { 0 }; @@ -1014,6 +1361,970 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,  	return 0;  } +static size_t strs_hash_fn(const void *key, void *ctx) +{ +	struct btf *btf = ctx; +	const char *str = btf->strs_data + (long)key; + +	return str_hash(str); +} + +static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx) +{ +	struct btf *btf = ctx; +	const char *str1 = btf->strs_data + (long)key1; +	const char *str2 = btf->strs_data + (long)key2; + +	return strcmp(str1, str2) == 0; +} + +static void btf_invalidate_raw_data(struct btf *btf) +{ +	if (btf->raw_data) { +		free(btf->raw_data); +		btf->raw_data = NULL; +	} +	if (btf->raw_data_swapped) { +		free(btf->raw_data_swapped); +		btf->raw_data_swapped = NULL; +	} +} + +/* Ensure BTF is ready to be modified (by splitting into a three memory + * regions for header, types, and strings). Also invalidate cached + * raw_data, if any. + */ +static int btf_ensure_modifiable(struct btf *btf) +{ +	void *hdr, *types, *strs, *strs_end, *s; +	struct hashmap *hash = NULL; +	long off; +	int err; + +	if (btf_is_modifiable(btf)) { +		/* any BTF modification invalidates raw_data */ +		btf_invalidate_raw_data(btf); +		return 0; +	} + +	/* split raw data into three memory regions */ +	hdr = malloc(btf->hdr->hdr_len); +	types = malloc(btf->hdr->type_len); +	strs = malloc(btf->hdr->str_len); +	if (!hdr || !types || !strs) +		goto err_out; + +	memcpy(hdr, btf->hdr, btf->hdr->hdr_len); +	memcpy(types, btf->types_data, btf->hdr->type_len); +	memcpy(strs, btf->strs_data, btf->hdr->str_len); + +	/* build lookup index for all strings */ +	hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf); +	if (IS_ERR(hash)) { +		err = PTR_ERR(hash); +		hash = NULL; +		goto err_out; +	} + +	strs_end = strs + btf->hdr->str_len; +	for (off = 0, s = strs; s < strs_end; off += strlen(s) + 1, s = strs + off) { +		/* hashmap__add() returns EEXIST if string with the same +		 * content already is in the hash map +		 */ +		err = hashmap__add(hash, (void *)off, (void *)off); +		if (err == -EEXIST) +			continue; /* duplicate */ +		if (err) +			goto err_out; +	} + +	/* only when everything was successful, update internal state */ +	btf->hdr = hdr; +	btf->types_data = types; +	btf->types_data_cap = btf->hdr->type_len; +	btf->strs_data = strs; +	btf->strs_data_cap = btf->hdr->str_len; +	btf->strs_hash = hash; +	/* if BTF was created from scratch, all strings are guaranteed to be +	 * unique and deduplicated +	 */ +	btf->strs_deduped = btf->hdr->str_len <= 1; + +	/* invalidate raw_data representation */ +	btf_invalidate_raw_data(btf); + +	return 0; + +err_out: +	hashmap__free(hash); +	free(hdr); +	free(types); +	free(strs); +	return -ENOMEM; +} + +static void *btf_add_str_mem(struct btf *btf, size_t add_sz) +{ +	return btf_add_mem(&btf->strs_data, &btf->strs_data_cap, 1, +			   btf->hdr->str_len, BTF_MAX_STR_OFFSET, add_sz); +} + +/* Find an offset in BTF string section that corresponds to a given string *s*. + * Returns: + *   - >0 offset into string section, if string is found; + *   - -ENOENT, if string is not in the string section; + *   - <0, on any other error. + */ +int btf__find_str(struct btf *btf, const char *s) +{ +	long old_off, new_off, len; +	void *p; + +	/* BTF needs to be in a modifiable state to build string lookup index */ +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	/* see btf__add_str() for why we do this */ +	len = strlen(s) + 1; +	p = btf_add_str_mem(btf, len); +	if (!p) +		return -ENOMEM; + +	new_off = btf->hdr->str_len; +	memcpy(p, s, len); + +	if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off)) +		return old_off; + +	return -ENOENT; +} + +/* Add a string s to the BTF string section. + * Returns: + *   - > 0 offset into string section, on success; + *   - < 0, on error. + */ +int btf__add_str(struct btf *btf, const char *s) +{ +	long old_off, new_off, len; +	void *p; +	int err; + +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	/* Hashmap keys are always offsets within btf->strs_data, so to even +	 * look up some string from the "outside", we need to first append it +	 * at the end, so that it can be addressed with an offset. Luckily, +	 * until btf->hdr->str_len is incremented, that string is just a piece +	 * of garbage for the rest of BTF code, so no harm, no foul. On the +	 * other hand, if the string is unique, it's already appended and +	 * ready to be used, only a simple btf->hdr->str_len increment away. +	 */ +	len = strlen(s) + 1; +	p = btf_add_str_mem(btf, len); +	if (!p) +		return -ENOMEM; + +	new_off = btf->hdr->str_len; +	memcpy(p, s, len); + +	/* Now attempt to add the string, but only if the string with the same +	 * contents doesn't exist already (HASHMAP_ADD strategy). If such +	 * string exists, we'll get its offset in old_off (that's old_key). +	 */ +	err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off, +			      HASHMAP_ADD, (const void **)&old_off, NULL); +	if (err == -EEXIST) +		return old_off; /* duplicated string, return existing offset */ +	if (err) +		return err; + +	btf->hdr->str_len += len; /* new unique string, adjust data length */ +	return new_off; +} + +static void *btf_add_type_mem(struct btf *btf, size_t add_sz) +{ +	return btf_add_mem(&btf->types_data, &btf->types_data_cap, 1, +			   btf->hdr->type_len, UINT_MAX, add_sz); +} + +static __u32 btf_type_info(int kind, int vlen, int kflag) +{ +	return (kflag << 31) | (kind << 24) | vlen; +} + +static void btf_type_inc_vlen(struct btf_type *t) +{ +	t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t)); +} + +/* + * Append new BTF_KIND_INT type with: + *   - *name* - non-empty, non-NULL type name; + *   - *sz* - power-of-2 (1, 2, 4, ..) size of the type, in bytes; + *   - encoding is a combination of BTF_INT_SIGNED, BTF_INT_CHAR, BTF_INT_BOOL. + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding) +{ +	struct btf_type *t; +	int sz, err, name_off; + +	/* non-empty name */ +	if (!name || !name[0]) +		return -EINVAL; +	/* byte_sz must be power of 2 */ +	if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16) +		return -EINVAL; +	if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL)) +		return -EINVAL; + +	/* deconstruct BTF, if necessary, and invalidate raw_data */ +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	sz = sizeof(struct btf_type) + sizeof(int); +	t = btf_add_type_mem(btf, sz); +	if (!t) +		return -ENOMEM; + +	/* if something goes wrong later, we might end up with an extra string, +	 * but that shouldn't be a problem, because BTF can't be constructed +	 * completely anyway and will most probably be just discarded +	 */ +	name_off = btf__add_str(btf, name); +	if (name_off < 0) +		return name_off; + +	t->name_off = name_off; +	t->info = btf_type_info(BTF_KIND_INT, 0, 0); +	t->size = byte_sz; +	/* set INT info, we don't allow setting legacy bit offset/size */ +	*(__u32 *)(t + 1) = (encoding << 24) | (byte_sz * 8); + +	err = btf_add_type_idx_entry(btf, btf->hdr->type_len); +	if (err) +		return err; + +	btf->hdr->type_len += sz; +	btf->hdr->str_off += sz; +	btf->nr_types++; +	return btf->nr_types; +} + +/* it's completely legal to append BTF types with type IDs pointing forward to + * types that haven't been appended yet, so we only make sure that id looks + * sane, we can't guarantee that ID will always be valid + */ +static int validate_type_id(int id) +{ +	if (id < 0 || id > BTF_MAX_NR_TYPES) +		return -EINVAL; +	return 0; +} + +/* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */ +static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id) +{ +	struct btf_type *t; +	int sz, name_off = 0, err; + +	if (validate_type_id(ref_type_id)) +		return -EINVAL; + +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	sz = sizeof(struct btf_type); +	t = btf_add_type_mem(btf, sz); +	if (!t) +		return -ENOMEM; + +	if (name && name[0]) { +		name_off = btf__add_str(btf, name); +		if (name_off < 0) +			return name_off; +	} + +	t->name_off = name_off; +	t->info = btf_type_info(kind, 0, 0); +	t->type = ref_type_id; + +	err = btf_add_type_idx_entry(btf, btf->hdr->type_len); +	if (err) +		return err; + +	btf->hdr->type_len += sz; +	btf->hdr->str_off += sz; +	btf->nr_types++; +	return btf->nr_types; +} + +/* + * Append new BTF_KIND_PTR type with: + *   - *ref_type_id* - referenced type ID, it might not exist yet; + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_ptr(struct btf *btf, int ref_type_id) +{ +	return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id); +} + +/* + * Append new BTF_KIND_ARRAY type with: + *   - *index_type_id* - type ID of the type describing array index; + *   - *elem_type_id* - type ID of the type describing array element; + *   - *nr_elems* - the size of the array; + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 nr_elems) +{ +	struct btf_type *t; +	struct btf_array *a; +	int sz, err; + +	if (validate_type_id(index_type_id) || validate_type_id(elem_type_id)) +		return -EINVAL; + +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	sz = sizeof(struct btf_type) + sizeof(struct btf_array); +	t = btf_add_type_mem(btf, sz); +	if (!t) +		return -ENOMEM; + +	t->name_off = 0; +	t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0); +	t->size = 0; + +	a = btf_array(t); +	a->type = elem_type_id; +	a->index_type = index_type_id; +	a->nelems = nr_elems; + +	err = btf_add_type_idx_entry(btf, btf->hdr->type_len); +	if (err) +		return err; + +	btf->hdr->type_len += sz; +	btf->hdr->str_off += sz; +	btf->nr_types++; +	return btf->nr_types; +} + +/* generic STRUCT/UNION append function */ +static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 bytes_sz) +{ +	struct btf_type *t; +	int sz, err, name_off = 0; + +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	sz = sizeof(struct btf_type); +	t = btf_add_type_mem(btf, sz); +	if (!t) +		return -ENOMEM; + +	if (name && name[0]) { +		name_off = btf__add_str(btf, name); +		if (name_off < 0) +			return name_off; +	} + +	/* start out with vlen=0 and no kflag; this will be adjusted when +	 * adding each member +	 */ +	t->name_off = name_off; +	t->info = btf_type_info(kind, 0, 0); +	t->size = bytes_sz; + +	err = btf_add_type_idx_entry(btf, btf->hdr->type_len); +	if (err) +		return err; + +	btf->hdr->type_len += sz; +	btf->hdr->str_off += sz; +	btf->nr_types++; +	return btf->nr_types; +} + +/* + * Append new BTF_KIND_STRUCT type with: + *   - *name* - name of the struct, can be NULL or empty for anonymous structs; + *   - *byte_sz* - size of the struct, in bytes; + * + * Struct initially has no fields in it. Fields can be added by + * btf__add_field() right after btf__add_struct() succeeds.  + * + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_struct(struct btf *btf, const char *name, __u32 byte_sz) +{ +	return btf_add_composite(btf, BTF_KIND_STRUCT, name, byte_sz); +} + +/* + * Append new BTF_KIND_UNION type with: + *   - *name* - name of the union, can be NULL or empty for anonymous union; + *   - *byte_sz* - size of the union, in bytes; + * + * Union initially has no fields in it. Fields can be added by + * btf__add_field() right after btf__add_union() succeeds. All fields + * should have *bit_offset* of 0. + * + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_union(struct btf *btf, const char *name, __u32 byte_sz) +{ +	return btf_add_composite(btf, BTF_KIND_UNION, name, byte_sz); +} + +/* + * Append new field for the current STRUCT/UNION type with: + *   - *name* - name of the field, can be NULL or empty for anonymous field; + *   - *type_id* - type ID for the type describing field type; + *   - *bit_offset* - bit offset of the start of the field within struct/union; + *   - *bit_size* - bit size of a bitfield, 0 for non-bitfield fields; + * Returns: + *   -  0, on success; + *   - <0, on error. + */ +int btf__add_field(struct btf *btf, const char *name, int type_id, +		   __u32 bit_offset, __u32 bit_size) +{ +	struct btf_type *t; +	struct btf_member *m; +	bool is_bitfield; +	int sz, name_off = 0; + +	/* last type should be union/struct */ +	if (btf->nr_types == 0) +		return -EINVAL; +	t = btf_type_by_id(btf, btf->nr_types); +	if (!btf_is_composite(t)) +		return -EINVAL; + +	if (validate_type_id(type_id)) +		return -EINVAL; +	/* best-effort bit field offset/size enforcement */ +	is_bitfield = bit_size || (bit_offset % 8 != 0); +	if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff)) +		return -EINVAL; + +	/* only offset 0 is allowed for unions */ +	if (btf_is_union(t) && bit_offset) +		return -EINVAL; + +	/* decompose and invalidate raw data */ +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	sz = sizeof(struct btf_member); +	m = btf_add_type_mem(btf, sz); +	if (!m) +		return -ENOMEM; + +	if (name && name[0]) { +		name_off = btf__add_str(btf, name); +		if (name_off < 0) +			return name_off; +	} + +	m->name_off = name_off; +	m->type = type_id; +	m->offset = bit_offset | (bit_size << 24); + +	/* btf_add_type_mem can invalidate t pointer */ +	t = btf_type_by_id(btf, btf->nr_types); +	/* update parent type's vlen and kflag */ +	t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t)); + +	btf->hdr->type_len += sz; +	btf->hdr->str_off += sz; +	return 0; +} + +/* + * Append new BTF_KIND_ENUM type with: + *   - *name* - name of the enum, can be NULL or empty for anonymous enums; + *   - *byte_sz* - size of the enum, in bytes. + * + * Enum initially has no enum values in it (and corresponds to enum forward + * declaration). Enumerator values can be added by btf__add_enum_value() + * immediately after btf__add_enum() succeeds. + * + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) +{ +	struct btf_type *t; +	int sz, err, name_off = 0; + +	/* byte_sz must be power of 2 */ +	if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8) +		return -EINVAL; + +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	sz = sizeof(struct btf_type); +	t = btf_add_type_mem(btf, sz); +	if (!t) +		return -ENOMEM; + +	if (name && name[0]) { +		name_off = btf__add_str(btf, name); +		if (name_off < 0) +			return name_off; +	} + +	/* start out with vlen=0; it will be adjusted when adding enum values */ +	t->name_off = name_off; +	t->info = btf_type_info(BTF_KIND_ENUM, 0, 0); +	t->size = byte_sz; + +	err = btf_add_type_idx_entry(btf, btf->hdr->type_len); +	if (err) +		return err; + +	btf->hdr->type_len += sz; +	btf->hdr->str_off += sz; +	btf->nr_types++; +	return btf->nr_types; +} + +/* + * Append new enum value for the current ENUM type with: + *   - *name* - name of the enumerator value, can't be NULL or empty; + *   - *value* - integer value corresponding to enum value *name*; + * Returns: + *   -  0, on success; + *   - <0, on error. + */ +int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) +{ +	struct btf_type *t; +	struct btf_enum *v; +	int sz, name_off; + +	/* last type should be BTF_KIND_ENUM */ +	if (btf->nr_types == 0) +		return -EINVAL; +	t = btf_type_by_id(btf, btf->nr_types); +	if (!btf_is_enum(t)) +		return -EINVAL; + +	/* non-empty name */ +	if (!name || !name[0]) +		return -EINVAL; +	if (value < INT_MIN || value > UINT_MAX) +		return -E2BIG; + +	/* decompose and invalidate raw data */ +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	sz = sizeof(struct btf_enum); +	v = btf_add_type_mem(btf, sz); +	if (!v) +		return -ENOMEM; + +	name_off = btf__add_str(btf, name); +	if (name_off < 0) +		return name_off; + +	v->name_off = name_off; +	v->val = value; + +	/* update parent type's vlen */ +	t = btf_type_by_id(btf, btf->nr_types); +	btf_type_inc_vlen(t); + +	btf->hdr->type_len += sz; +	btf->hdr->str_off += sz; +	return 0; +} + +/* + * Append new BTF_KIND_FWD type with: + *   - *name*, non-empty/non-NULL name; + *   - *fwd_kind*, kind of forward declaration, one of BTF_FWD_STRUCT, + *     BTF_FWD_UNION, or BTF_FWD_ENUM; + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind) +{ +	if (!name || !name[0]) +		return -EINVAL; + +	switch (fwd_kind) { +	case BTF_FWD_STRUCT: +	case BTF_FWD_UNION: { +		struct btf_type *t; +		int id; + +		id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0); +		if (id <= 0) +			return id; +		t = btf_type_by_id(btf, id); +		t->info = btf_type_info(BTF_KIND_FWD, 0, fwd_kind == BTF_FWD_UNION); +		return id; +	} +	case BTF_FWD_ENUM: +		/* enum forward in BTF currently is just an enum with no enum +		 * values; we also assume a standard 4-byte size for it +		 */ +		return btf__add_enum(btf, name, sizeof(int)); +	default: +		return -EINVAL; +	} +} + +/* + * Append new BTF_KING_TYPEDEF type with: + *   - *name*, non-empty/non-NULL name; + *   - *ref_type_id* - referenced type ID, it might not exist yet; + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id) +{ +	if (!name || !name[0]) +		return -EINVAL; + +	return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id); +} + +/* + * Append new BTF_KIND_VOLATILE type with: + *   - *ref_type_id* - referenced type ID, it might not exist yet; + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_volatile(struct btf *btf, int ref_type_id) +{ +	return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id); +} + +/* + * Append new BTF_KIND_CONST type with: + *   - *ref_type_id* - referenced type ID, it might not exist yet; + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_const(struct btf *btf, int ref_type_id) +{ +	return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id); +} + +/* + * Append new BTF_KIND_RESTRICT type with: + *   - *ref_type_id* - referenced type ID, it might not exist yet; + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_restrict(struct btf *btf, int ref_type_id) +{ +	return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id); +} + +/* + * Append new BTF_KIND_FUNC type with: + *   - *name*, non-empty/non-NULL name; + *   - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet; + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_func(struct btf *btf, const char *name, +		  enum btf_func_linkage linkage, int proto_type_id) +{ +	int id; + +	if (!name || !name[0]) +		return -EINVAL; +	if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL && +	    linkage != BTF_FUNC_EXTERN) +		return -EINVAL; + +	id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id); +	if (id > 0) { +		struct btf_type *t = btf_type_by_id(btf, id); + +		t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0); +	} +	return id; +} + +/* + * Append new BTF_KIND_FUNC_PROTO with: + *   - *ret_type_id* - type ID for return result of a function. + * + * Function prototype initially has no arguments, but they can be added by + * btf__add_func_param() one by one, immediately after + * btf__add_func_proto() succeeded. + * + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_func_proto(struct btf *btf, int ret_type_id) +{ +	struct btf_type *t; +	int sz, err; + +	if (validate_type_id(ret_type_id)) +		return -EINVAL; + +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	sz = sizeof(struct btf_type); +	t = btf_add_type_mem(btf, sz); +	if (!t) +		return -ENOMEM; + +	/* start out with vlen=0; this will be adjusted when adding enum +	 * values, if necessary +	 */ +	t->name_off = 0; +	t->info = btf_type_info(BTF_KIND_FUNC_PROTO, 0, 0); +	t->type = ret_type_id; + +	err = btf_add_type_idx_entry(btf, btf->hdr->type_len); +	if (err) +		return err; + +	btf->hdr->type_len += sz; +	btf->hdr->str_off += sz; +	btf->nr_types++; +	return btf->nr_types; +} + +/* + * Append new function parameter for current FUNC_PROTO type with: + *   - *name* - parameter name, can be NULL or empty; + *   - *type_id* - type ID describing the type of the parameter. + * Returns: + *   -  0, on success; + *   - <0, on error. + */ +int btf__add_func_param(struct btf *btf, const char *name, int type_id) +{ +	struct btf_type *t; +	struct btf_param *p; +	int sz, name_off = 0; + +	if (validate_type_id(type_id)) +		return -EINVAL; + +	/* last type should be BTF_KIND_FUNC_PROTO */ +	if (btf->nr_types == 0) +		return -EINVAL; +	t = btf_type_by_id(btf, btf->nr_types); +	if (!btf_is_func_proto(t)) +		return -EINVAL; + +	/* decompose and invalidate raw data */ +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	sz = sizeof(struct btf_param); +	p = btf_add_type_mem(btf, sz); +	if (!p) +		return -ENOMEM; + +	if (name && name[0]) { +		name_off = btf__add_str(btf, name); +		if (name_off < 0) +			return name_off; +	} + +	p->name_off = name_off; +	p->type = type_id; + +	/* update parent type's vlen */ +	t = btf_type_by_id(btf, btf->nr_types); +	btf_type_inc_vlen(t); + +	btf->hdr->type_len += sz; +	btf->hdr->str_off += sz; +	return 0; +} + +/* + * Append new BTF_KIND_VAR type with: + *   - *name* - non-empty/non-NULL name; + *   - *linkage* - variable linkage, one of BTF_VAR_STATIC, + *     BTF_VAR_GLOBAL_ALLOCATED, or BTF_VAR_GLOBAL_EXTERN; + *   - *type_id* - type ID of the type describing the type of the variable. + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) +{ +	struct btf_type *t; +	struct btf_var *v; +	int sz, err, name_off; + +	/* non-empty name */ +	if (!name || !name[0]) +		return -EINVAL; +	if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED && +	    linkage != BTF_VAR_GLOBAL_EXTERN) +		return -EINVAL; +	if (validate_type_id(type_id)) +		return -EINVAL; + +	/* deconstruct BTF, if necessary, and invalidate raw_data */ +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	sz = sizeof(struct btf_type) + sizeof(struct btf_var); +	t = btf_add_type_mem(btf, sz); +	if (!t) +		return -ENOMEM; + +	name_off = btf__add_str(btf, name); +	if (name_off < 0) +		return name_off; + +	t->name_off = name_off; +	t->info = btf_type_info(BTF_KIND_VAR, 0, 0); +	t->type = type_id; + +	v = btf_var(t); +	v->linkage = linkage; + +	err = btf_add_type_idx_entry(btf, btf->hdr->type_len); +	if (err) +		return err; + +	btf->hdr->type_len += sz; +	btf->hdr->str_off += sz; +	btf->nr_types++; +	return btf->nr_types; +} + +/* + * Append new BTF_KIND_DATASEC type with: + *   - *name* - non-empty/non-NULL name; + *   - *byte_sz* - data section size, in bytes. + * + * Data section is initially empty. Variables info can be added with + * btf__add_datasec_var_info() calls, after btf__add_datasec() succeeds. + * + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz) +{ +	struct btf_type *t; +	int sz, err, name_off; + +	/* non-empty name */ +	if (!name || !name[0]) +		return -EINVAL; + +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	sz = sizeof(struct btf_type); +	t = btf_add_type_mem(btf, sz); +	if (!t) +		return -ENOMEM; + +	name_off = btf__add_str(btf, name); +	if (name_off < 0) +		return name_off; + +	/* start with vlen=0, which will be update as var_secinfos are added */ +	t->name_off = name_off; +	t->info = btf_type_info(BTF_KIND_DATASEC, 0, 0); +	t->size = byte_sz; + +	err = btf_add_type_idx_entry(btf, btf->hdr->type_len); +	if (err) +		return err; + +	btf->hdr->type_len += sz; +	btf->hdr->str_off += sz; +	btf->nr_types++; +	return btf->nr_types; +} + +/* + * Append new data section variable information entry for current DATASEC type: + *   - *var_type_id* - type ID, describing type of the variable; + *   - *offset* - variable offset within data section, in bytes; + *   - *byte_sz* - variable size, in bytes. + * + * Returns: + *   -  0, on success; + *   - <0, on error. + */ +int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __u32 byte_sz) +{ +	struct btf_type *t; +	struct btf_var_secinfo *v; +	int sz; + +	/* last type should be BTF_KIND_DATASEC */ +	if (btf->nr_types == 0) +		return -EINVAL; +	t = btf_type_by_id(btf, btf->nr_types); +	if (!btf_is_datasec(t)) +		return -EINVAL; + +	if (validate_type_id(var_type_id)) +		return -EINVAL; + +	/* decompose and invalidate raw data */ +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; + +	sz = sizeof(struct btf_var_secinfo); +	v = btf_add_type_mem(btf, sz); +	if (!v) +		return -ENOMEM; + +	v->type = var_type_id; +	v->offset = offset; +	v->size = byte_sz; + +	/* update parent type's vlen */ +	t = btf_type_by_id(btf, btf->nr_types); +	btf_type_inc_vlen(t); + +	btf->hdr->type_len += sz; +	btf->hdr->str_off += sz; +	return 0; +} +  struct btf_ext_sec_setup_param {  	__u32 off;  	__u32 len; @@ -1137,14 +2448,14 @@ static int btf_ext_setup_line_info(struct btf_ext *btf_ext)  	return btf_ext_setup_info(btf_ext, ¶m);  } -static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext) +static int btf_ext_setup_core_relos(struct btf_ext *btf_ext)  {  	struct btf_ext_sec_setup_param param = { -		.off = btf_ext->hdr->field_reloc_off, -		.len = btf_ext->hdr->field_reloc_len, -		.min_rec_size = sizeof(struct bpf_field_reloc), -		.ext_info = &btf_ext->field_reloc_info, -		.desc = "field_reloc", +		.off = btf_ext->hdr->core_relo_off, +		.len = btf_ext->hdr->core_relo_len, +		.min_rec_size = sizeof(struct bpf_core_relo), +		.ext_info = &btf_ext->core_relo_info, +		.desc = "core_relo",  	};  	return btf_ext_setup_info(btf_ext, ¶m); @@ -1160,7 +2471,10 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)  		return -EINVAL;  	} -	if (hdr->magic != BTF_MAGIC) { +	if (hdr->magic == bswap_16(BTF_MAGIC)) { +		pr_warn("BTF.ext in non-native endianness is not supported\n"); +		return -ENOTSUP; +	} else if (hdr->magic != BTF_MAGIC) {  		pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic);  		return -EINVAL;  	} @@ -1223,10 +2537,9 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)  	if (err)  		goto done; -	if (btf_ext->hdr->hdr_len < -	    offsetofend(struct btf_ext_header, field_reloc_len)) +	if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))  		goto done; -	err = btf_ext_setup_field_reloc(btf_ext); +	err = btf_ext_setup_core_relos(btf_ext);  	if (err)  		goto done; @@ -1481,6 +2794,9 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,  		return -EINVAL;  	} +	if (btf_ensure_modifiable(btf)) +		return -ENOMEM; +  	err = btf_dedup_strings(d);  	if (err < 0) {  		pr_debug("btf_dedup_strings failed:%d\n", err); @@ -1581,7 +2897,7 @@ static int btf_dedup_hypot_map_add(struct btf_dedup *d,  		__u32 *new_list;  		d->hypot_cap += max((size_t)16, d->hypot_cap / 2); -		new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap); +		new_list = libbpf_reallocarray(d->hypot_list, d->hypot_cap, sizeof(__u32));  		if (!new_list)  			return -ENOMEM;  		d->hypot_list = new_list; @@ -1665,7 +2981,7 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,  	/* special BTF "void" type is made canonical immediately */  	d->map[0] = 0;  	for (i = 1; i <= btf->nr_types; i++) { -		struct btf_type *t = d->btf->types[i]; +		struct btf_type *t = btf_type_by_id(d->btf, i);  		/* VAR and DATASEC are never deduped and are self-canonical */  		if (btf_is_var(t) || btf_is_datasec(t)) @@ -1704,7 +3020,7 @@ static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)  	struct btf_type *t;  	for (i = 1; i <= d->btf->nr_types; i++) { -		t = d->btf->types[i]; +		t = btf_type_by_id(d->btf, i);  		r = fn(&t->name_off, ctx);  		if (r)  			return r; @@ -1858,8 +3174,7 @@ static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)   */  static int btf_dedup_strings(struct btf_dedup *d)  { -	const struct btf_header *hdr = d->btf->hdr; -	char *start = (char *)d->btf->nohdr_data + hdr->str_off; +	char *start = d->btf->strs_data;  	char *end = start + d->btf->hdr->str_len;  	char *p = start, *tmp_strs = NULL;  	struct btf_str_ptrs strs = { @@ -1871,14 +3186,16 @@ static int btf_dedup_strings(struct btf_dedup *d)  	int i, j, err = 0, grp_idx;  	bool grp_used; +	if (d->btf->strs_deduped) +		return 0; +  	/* build index of all strings */  	while (p < end) {  		if (strs.cnt + 1 > strs.cap) {  			struct btf_str_ptr *new_ptrs;  			strs.cap += max(strs.cnt / 2, 16U); -			new_ptrs = realloc(strs.ptrs, -					   sizeof(strs.ptrs[0]) * strs.cap); +			new_ptrs = libbpf_reallocarray(strs.ptrs, strs.cap, sizeof(strs.ptrs[0]));  			if (!new_ptrs) {  				err = -ENOMEM;  				goto done; @@ -1964,6 +3281,7 @@ static int btf_dedup_strings(struct btf_dedup *d)  		goto done;  	d->btf->hdr->str_len = end - start; +	d->btf->strs_deduped = true;  done:  	free(tmp_strs); @@ -2240,7 +3558,7 @@ static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)   */  static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)  { -	struct btf_type *t = d->btf->types[type_id]; +	struct btf_type *t = btf_type_by_id(d->btf, type_id);  	struct hashmap_entry *hash_entry;  	struct btf_type *cand;  	/* if we don't find equivalent type, then we are canonical */ @@ -2267,7 +3585,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)  		h = btf_hash_int(t);  		for_each_dedup_cand(d, hash_entry, h) {  			cand_id = (__u32)(long)hash_entry->value; -			cand = d->btf->types[cand_id]; +			cand = btf_type_by_id(d->btf, cand_id);  			if (btf_equal_int(t, cand)) {  				new_id = cand_id;  				break; @@ -2279,7 +3597,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)  		h = btf_hash_enum(t);  		for_each_dedup_cand(d, hash_entry, h) {  			cand_id = (__u32)(long)hash_entry->value; -			cand = d->btf->types[cand_id]; +			cand = btf_type_by_id(d->btf, cand_id);  			if (btf_equal_enum(t, cand)) {  				new_id = cand_id;  				break; @@ -2302,7 +3620,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)  		h = btf_hash_common(t);  		for_each_dedup_cand(d, hash_entry, h) {  			cand_id = (__u32)(long)hash_entry->value; -			cand = d->btf->types[cand_id]; +			cand = btf_type_by_id(d->btf, cand_id);  			if (btf_equal_common(t, cand)) {  				new_id = cand_id;  				break; @@ -2361,13 +3679,13 @@ static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)  {  	__u32 orig_type_id = type_id; -	if (!btf_is_fwd(d->btf->types[type_id])) +	if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))  		return type_id;  	while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)  		type_id = d->map[type_id]; -	if (!btf_is_fwd(d->btf->types[type_id])) +	if (!btf_is_fwd(btf__type_by_id(d->btf, type_id)))  		return type_id;  	return orig_type_id; @@ -2495,8 +3813,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,  	if (btf_dedup_hypot_map_add(d, canon_id, cand_id))  		return -ENOMEM; -	cand_type = d->btf->types[cand_id]; -	canon_type = d->btf->types[canon_id]; +	cand_type = btf_type_by_id(d->btf, cand_id); +	canon_type = btf_type_by_id(d->btf, canon_id);  	cand_kind = btf_kind(cand_type);  	canon_kind = btf_kind(canon_type); @@ -2647,8 +3965,8 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)  		targ_type_id = d->hypot_map[cand_type_id];  		t_id = resolve_type_id(d, targ_type_id);  		c_id = resolve_type_id(d, cand_type_id); -		t_kind = btf_kind(d->btf->types[t_id]); -		c_kind = btf_kind(d->btf->types[c_id]); +		t_kind = btf_kind(btf__type_by_id(d->btf, t_id)); +		c_kind = btf_kind(btf__type_by_id(d->btf, c_id));  		/*  		 * Resolve FWD into STRUCT/UNION.  		 * It's ok to resolve FWD into STRUCT/UNION that's not yet @@ -2716,7 +4034,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)  	if (d->map[type_id] <= BTF_MAX_NR_TYPES)  		return 0; -	t = d->btf->types[type_id]; +	t = btf_type_by_id(d->btf, type_id);  	kind = btf_kind(t);  	if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) @@ -2737,7 +4055,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)  		 * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because  		 * FWD and compatible STRUCT/UNION are considered equivalent.  		 */ -		cand_type = d->btf->types[cand_id]; +		cand_type = btf_type_by_id(d->btf, cand_id);  		if (!btf_shallow_equal_struct(t, cand_type))  			continue; @@ -2809,7 +4127,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)  	if (d->map[type_id] <= BTF_MAX_NR_TYPES)  		return resolve_type_id(d, type_id); -	t = d->btf->types[type_id]; +	t = btf_type_by_id(d->btf, type_id);  	d->map[type_id] = BTF_IN_PROGRESS_ID;  	switch (btf_kind(t)) { @@ -2827,7 +4145,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)  		h = btf_hash_common(t);  		for_each_dedup_cand(d, hash_entry, h) {  			cand_id = (__u32)(long)hash_entry->value; -			cand = d->btf->types[cand_id]; +			cand = btf_type_by_id(d->btf, cand_id);  			if (btf_equal_common(t, cand)) {  				new_id = cand_id;  				break; @@ -2851,7 +4169,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)  		h = btf_hash_array(t);  		for_each_dedup_cand(d, hash_entry, h) {  			cand_id = (__u32)(long)hash_entry->value; -			cand = d->btf->types[cand_id]; +			cand = btf_type_by_id(d->btf, cand_id);  			if (btf_equal_array(t, cand)) {  				new_id = cand_id;  				break; @@ -2883,7 +4201,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)  		h = btf_hash_fnproto(t);  		for_each_dedup_cand(d, hash_entry, h) {  			cand_id = (__u32)(long)hash_entry->value; -			cand = d->btf->types[cand_id]; +			cand = btf_type_by_id(d->btf, cand_id);  			if (btf_equal_fnproto(t, cand)) {  				new_id = cand_id;  				break; @@ -2931,9 +4249,9 @@ static int btf_dedup_ref_types(struct btf_dedup *d)   */  static int btf_dedup_compact_types(struct btf_dedup *d)  { -	struct btf_type **new_types; +	__u32 *new_offs;  	__u32 next_type_id = 1; -	char *types_start, *p; +	void *p;  	int i, len;  	/* we are going to reuse hypot_map to store compaction remapping */ @@ -2941,41 +4259,34 @@ static int btf_dedup_compact_types(struct btf_dedup *d)  	for (i = 1; i <= d->btf->nr_types; i++)  		d->hypot_map[i] = BTF_UNPROCESSED_ID; -	types_start = d->btf->nohdr_data + d->btf->hdr->type_off; -	p = types_start; +	p = d->btf->types_data;  	for (i = 1; i <= d->btf->nr_types; i++) {  		if (d->map[i] != i)  			continue; -		len = btf_type_size(d->btf->types[i]); +		len = btf_type_size(btf__type_by_id(d->btf, i));  		if (len < 0)  			return len; -		memmove(p, d->btf->types[i], len); +		memmove(p, btf__type_by_id(d->btf, i), len);  		d->hypot_map[i] = next_type_id; -		d->btf->types[next_type_id] = (struct btf_type *)p; +		d->btf->type_offs[next_type_id] = p - d->btf->types_data;  		p += len;  		next_type_id++;  	}  	/* shrink struct btf's internal types index and update btf_header */  	d->btf->nr_types = next_type_id - 1; -	d->btf->types_size = d->btf->nr_types; -	d->btf->hdr->type_len = p - types_start; -	new_types = realloc(d->btf->types, -			    (1 + d->btf->nr_types) * sizeof(struct btf_type *)); -	if (!new_types) +	d->btf->type_offs_cap = d->btf->nr_types + 1; +	d->btf->hdr->type_len = p - d->btf->types_data; +	new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap, +				       sizeof(*new_offs)); +	if (!new_offs)  		return -ENOMEM; -	d->btf->types = new_types; - -	/* make sure string section follows type information without gaps */ -	d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data; -	memmove(p, d->btf->strings, d->btf->hdr->str_len); -	d->btf->strings = p; -	p += d->btf->hdr->str_len; - -	d->btf->data_size = p - (char *)d->btf->data; +	d->btf->type_offs = new_offs; +	d->btf->hdr->str_off = d->btf->hdr->type_len; +	d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len;  	return 0;  } @@ -3008,7 +4319,7 @@ static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)   */  static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)  { -	struct btf_type *t = d->btf->types[type_id]; +	struct btf_type *t = btf_type_by_id(d->btf, type_id);  	int i, r;  	switch (btf_kind(t)) { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 1ca14448df4c..57247240a20a 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -5,6 +5,7 @@  #define __LIBBPF_BTF_H  #include <stdarg.h> +#include <stdbool.h>  #include <linux/btf.h>  #include <linux/types.h> @@ -24,46 +25,14 @@ struct btf_type;  struct bpf_object; -/* - * The .BTF.ext ELF section layout defined as - *   struct btf_ext_header - *   func_info subsection - * - * The func_info subsection layout: - *   record size for struct bpf_func_info in the func_info subsection - *   struct btf_sec_func_info for section #1 - *   a list of bpf_func_info records for section #1 - *     where struct bpf_func_info mimics one in include/uapi/linux/bpf.h - *     but may not be identical - *   struct btf_sec_func_info for section #2 - *   a list of bpf_func_info records for section #2 - *   ...... - * - * Note that the bpf_func_info record size in .BTF.ext may not - * be the same as the one defined in include/uapi/linux/bpf.h. - * The loader should ensure that record_size meets minimum - * requirement and pass the record as is to the kernel. The - * kernel will handle the func_info properly based on its contents. - */ -struct btf_ext_header { -	__u16	magic; -	__u8	version; -	__u8	flags; -	__u32	hdr_len; - -	/* All offsets are in bytes relative to the end of this header */ -	__u32	func_info_off; -	__u32	func_info_len; -	__u32	line_info_off; -	__u32	line_info_len; - -	/* optional part of .BTF.ext header */ -	__u32	field_reloc_off; -	__u32	field_reloc_len; +enum btf_endianness { +	BTF_LITTLE_ENDIAN = 0, +	BTF_BIG_ENDIAN = 1,  };  LIBBPF_API void btf__free(struct btf *btf);  LIBBPF_API struct btf *btf__new(const void *data, __u32 size); +LIBBPF_API struct btf *btf__new_empty(void);  LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);  LIBBPF_API struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext);  LIBBPF_API struct btf *btf__parse_raw(const char *path); @@ -78,6 +47,8 @@ LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,  						  __u32 id);  LIBBPF_API size_t btf__pointer_size(const struct btf *btf);  LIBBPF_API int btf__set_pointer_size(struct btf *btf, size_t ptr_sz); +LIBBPF_API enum btf_endianness btf__endianness(const struct btf *btf); +LIBBPF_API int btf__set_endianness(struct btf *btf, enum btf_endianness endian);  LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);  LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);  LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); @@ -85,6 +56,7 @@ LIBBPF_API int btf__fd(const struct btf *btf);  LIBBPF_API void btf__set_fd(struct btf *btf, int fd);  LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);  LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); +LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);  LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);  LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,  				    __u32 expected_key_size, @@ -95,19 +67,62 @@ LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);  LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);  LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,  					     __u32 *size); -LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf, -					const struct btf_ext *btf_ext, -					const char *sec_name, __u32 insns_cnt, -					void **func_info, __u32 *cnt); -LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf, -					const struct btf_ext *btf_ext, -					const char *sec_name, __u32 insns_cnt, -					void **line_info, __u32 *cnt); +LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_func_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions") +int btf_ext__reloc_func_info(const struct btf *btf, +			     const struct btf_ext *btf_ext, +			     const char *sec_name, __u32 insns_cnt, +			     void **func_info, __u32 *cnt); +LIBBPF_API LIBBPF_DEPRECATED("btf_ext__reloc_line_info was never meant as a public API and has wrong assumptions embedded in it; it will be removed in the future libbpf versions") +int btf_ext__reloc_line_info(const struct btf *btf, +			     const struct btf_ext *btf_ext, +			     const char *sec_name, __u32 insns_cnt, +			     void **line_info, __u32 *cnt);  LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);  LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);  LIBBPF_API struct btf *libbpf_find_kernel_btf(void); +LIBBPF_API int btf__find_str(struct btf *btf, const char *s); +LIBBPF_API int btf__add_str(struct btf *btf, const char *s); + +LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding); +LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id); +LIBBPF_API int btf__add_array(struct btf *btf, +			      int index_type_id, int elem_type_id, __u32 nr_elems); +/* struct/union construction APIs */ +LIBBPF_API int btf__add_struct(struct btf *btf, const char *name, __u32 sz); +LIBBPF_API int btf__add_union(struct btf *btf, const char *name, __u32 sz); +LIBBPF_API int btf__add_field(struct btf *btf, const char *name, int field_type_id, +			      __u32 bit_offset, __u32 bit_size); + +/* enum construction APIs */ +LIBBPF_API int btf__add_enum(struct btf *btf, const char *name, __u32 bytes_sz); +LIBBPF_API int btf__add_enum_value(struct btf *btf, const char *name, __s64 value); + +enum btf_fwd_kind { +	BTF_FWD_STRUCT = 0, +	BTF_FWD_UNION = 1, +	BTF_FWD_ENUM = 2, +}; + +LIBBPF_API int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind); +LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id); +LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id); +LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id); +LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id); + +/* func and func_proto construction APIs */ +LIBBPF_API int btf__add_func(struct btf *btf, const char *name, +			     enum btf_func_linkage linkage, int proto_type_id); +LIBBPF_API int btf__add_func_proto(struct btf *btf, int ret_type_id); +LIBBPF_API int btf__add_func_param(struct btf *btf, const char *name, int type_id); + +/* var & datasec construction APIs */ +LIBBPF_API int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id); +LIBBPF_API int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz); +LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id, +					 __u32 offset, __u32 byte_sz); +  struct btf_dedup_opts {  	unsigned int dedup_table_size;  	bool dont_resolve_fwds; diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 57c00fa63932..2f9d685bd522 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -19,9 +19,6 @@  #include "libbpf.h"  #include "libbpf_internal.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -  static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";  static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; @@ -63,11 +60,14 @@ struct btf_dump {  	struct btf_dump_opts opts;  	int ptr_sz;  	bool strip_mods; +	int last_id;  	/* per-type auxiliary state */  	struct btf_dump_type_aux_state *type_states; +	size_t type_states_cap;  	/* per-type optional cached unique name, must be freed, if present */  	const char **cached_names; +	size_t cached_names_cap;  	/* topo-sorted list of dependent type definitions */  	__u32 *emit_queue; @@ -93,14 +93,7 @@ struct btf_dump {  static size_t str_hash_fn(const void *key, void *ctx)  { -	const char *s = key; -	size_t h = 0; - -	while (*s) { -		h = h * 31 + *s; -		s++; -	} -	return h; +	return str_hash(key);  }  static bool str_equal_fn(const void *a, const void *b, void *ctx) @@ -123,6 +116,7 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)  }  static int btf_dump_mark_referenced(struct btf_dump *d); +static int btf_dump_resize(struct btf_dump *d);  struct btf_dump *btf_dump__new(const struct btf *btf,  			       const struct btf_ext *btf_ext, @@ -154,25 +148,8 @@ struct btf_dump *btf_dump__new(const struct btf *btf,  		d->ident_names = NULL;  		goto err;  	} -	d->type_states = calloc(1 + btf__get_nr_types(d->btf), -				sizeof(d->type_states[0])); -	if (!d->type_states) { -		err = -ENOMEM; -		goto err; -	} -	d->cached_names = calloc(1 + btf__get_nr_types(d->btf), -				 sizeof(d->cached_names[0])); -	if (!d->cached_names) { -		err = -ENOMEM; -		goto err; -	} -	/* VOID is special */ -	d->type_states[0].order_state = ORDERED; -	d->type_states[0].emit_state = EMITTED; - -	/* eagerly determine referenced types for anon enums */ -	err = btf_dump_mark_referenced(d); +	err = btf_dump_resize(d);  	if (err)  		goto err; @@ -182,9 +159,38 @@ err:  	return ERR_PTR(err);  } +static int btf_dump_resize(struct btf_dump *d) +{ +	int err, last_id = btf__get_nr_types(d->btf); + +	if (last_id <= d->last_id) +		return 0; + +	if (btf_ensure_mem((void **)&d->type_states, &d->type_states_cap, +			   sizeof(*d->type_states), last_id + 1)) +		return -ENOMEM; +	if (btf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap, +			   sizeof(*d->cached_names), last_id + 1)) +		return -ENOMEM; + +	if (d->last_id == 0) { +		/* VOID is special */ +		d->type_states[0].order_state = ORDERED; +		d->type_states[0].emit_state = EMITTED; +	} + +	/* eagerly determine referenced types for anon enums */ +	err = btf_dump_mark_referenced(d); +	if (err) +		return err; + +	d->last_id = last_id; +	return 0; +} +  void btf_dump__free(struct btf_dump *d)  { -	int i, cnt; +	int i;  	if (IS_ERR_OR_NULL(d))  		return; @@ -192,7 +198,7 @@ void btf_dump__free(struct btf_dump *d)  	free(d->type_states);  	if (d->cached_names) {  		/* any set cached name is owned by us and should be freed */ -		for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) { +		for (i = 0; i <= d->last_id; i++) {  			if (d->cached_names[i])  				free((void *)d->cached_names[i]);  		} @@ -232,6 +238,10 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)  	if (id > btf__get_nr_types(d->btf))  		return -EINVAL; +	err = btf_dump_resize(d); +	if (err) +		return err; +  	d->emit_queue_cnt = 0;  	err = btf_dump_order_type(d, id, false);  	if (err < 0) @@ -261,7 +271,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)  	const struct btf_type *t;  	__u16 vlen; -	for (i = 1; i <= n; i++) { +	for (i = d->last_id + 1; i <= n; i++) {  		t = btf__type_by_id(d->btf, i);  		vlen = btf_vlen(t); @@ -316,6 +326,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)  	}  	return 0;  } +  static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)  {  	__u32 *new_queue; @@ -323,8 +334,7 @@ static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)  	if (d->emit_queue_cnt >= d->emit_queue_cap) {  		new_cap = max(16, d->emit_queue_cap * 3 / 2); -		new_queue = realloc(d->emit_queue, -				    new_cap * sizeof(new_queue[0])); +		new_queue = libbpf_reallocarray(d->emit_queue, new_cap, sizeof(new_queue[0]));  		if (!new_queue)  			return -ENOMEM;  		d->emit_queue = new_queue; @@ -1003,8 +1013,7 @@ static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)  	if (d->decl_stack_cnt >= d->decl_stack_cap) {  		new_cap = max(16, d->decl_stack_cap * 3 / 2); -		new_stack = realloc(d->decl_stack, -				    new_cap * sizeof(new_stack[0])); +		new_stack = libbpf_reallocarray(d->decl_stack, new_cap, sizeof(new_stack[0]));  		if (!new_stack)  			return -ENOMEM;  		d->decl_stack = new_stack; @@ -1061,11 +1070,15 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,  			     const struct btf_dump_emit_type_decl_opts *opts)  {  	const char *fname; -	int lvl; +	int lvl, err;  	if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))  		return -EINVAL; +	err = btf_dump_resize(d); +	if (err) +		return -EINVAL; +  	fname = OPTS_GET(opts, field_name, "");  	lvl = OPTS_GET(opts, indent_level, 0);  	d->strip_mods = OPTS_GET(opts, strip_mods, false); diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c index a405dad068f5..3c20b126d60d 100644 --- a/tools/lib/bpf/hashmap.c +++ b/tools/lib/bpf/hashmap.c @@ -15,6 +15,9 @@  /* make sure libbpf doesn't use kernel-only integer typedefs */  #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 +/* prevent accidental re-addition of reallocarray() */ +#pragma GCC poison reallocarray +  /* start with 4 buckets */  #define HASHMAP_MIN_CAP_BITS 2 diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index e0af36b0e5d8..d9b385fe808c 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -25,6 +25,18 @@ static inline size_t hash_bits(size_t h, int bits)  #endif  } +/* generic C-string hashing function */ +static inline size_t str_hash(const char *s) +{ +	size_t h = 0; + +	while (*s) { +		h = h * 31 + *s; +		s++; +	} +	return h; +} +  typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);  typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e493d6048143..313034117070 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -44,7 +44,6 @@  #include <sys/vfs.h>  #include <sys/utsname.h>  #include <sys/resource.h> -#include <tools/libc_compat.h>  #include <libelf.h>  #include <gelf.h>  #include <zlib.h> @@ -56,9 +55,6 @@  #include "libbpf_internal.h"  #include "hashmap.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -  #ifndef EM_BPF  #define EM_BPF 247  #endif @@ -67,6 +63,8 @@  #define BPF_FS_MAGIC		0xcafe4a11  #endif +#define BPF_INSN_SZ (sizeof(struct bpf_insn)) +  /* vsprintf() in __base_pr() uses nonliteral format string. It may break   * compilation if user enables corresponding warning. Disable it explicitly.   */ @@ -75,8 +73,6 @@  #define __printf(a, b)	__attribute__((format(printf, a, b)))  static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); -static struct bpf_program *bpf_object__find_prog_by_idx(struct bpf_object *obj, -							int idx);  static const struct btf_type *  skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); @@ -154,34 +150,37 @@ static void pr_perm_msg(int err)  	___err; })  #endif -#ifdef HAVE_LIBELF_MMAP_SUPPORT -# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP -#else -# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ -#endif -  static inline __u64 ptr_to_u64(const void *ptr)  {  	return (__u64) (unsigned long) ptr;  } -struct bpf_capabilities { +enum kern_feature_id {  	/* v4.14: kernel support for program & map names. */ -	__u32 name:1; +	FEAT_PROG_NAME,  	/* v5.2: kernel support for global data sections. */ -	__u32 global_data:1; +	FEAT_GLOBAL_DATA, +	/* BTF support */ +	FEAT_BTF,  	/* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ -	__u32 btf_func:1; +	FEAT_BTF_FUNC,  	/* BTF_KIND_VAR and BTF_KIND_DATASEC support */ -	__u32 btf_datasec:1; -	/* BPF_F_MMAPABLE is supported for arrays */ -	__u32 array_mmap:1; +	FEAT_BTF_DATASEC,  	/* BTF_FUNC_GLOBAL is supported */ -	__u32 btf_func_global:1; +	FEAT_BTF_GLOBAL_FUNC, +	/* BPF_F_MMAPABLE is supported for arrays */ +	FEAT_ARRAY_MMAP,  	/* kernel support for expected_attach_type in BPF_PROG_LOAD */ -	__u32 exp_attach_type:1; +	FEAT_EXP_ATTACH_TYPE, +	/* bpf_probe_read_{kernel,user}[_str] helpers */ +	FEAT_PROBE_READ_KERN, +	/* BPF_PROG_BIND_MAP is supported */ +	FEAT_PROG_BIND_MAP, +	__FEAT_CNT,  }; +static bool kernel_supports(enum kern_feature_id feat_id); +  enum reloc_type {  	RELO_LD64,  	RELO_CALL, @@ -194,6 +193,7 @@ struct reloc_desc {  	int insn_idx;  	int map_idx;  	int sym_off; +	bool processed;  };  struct bpf_sec_def; @@ -209,6 +209,7 @@ struct bpf_sec_def {  	bool is_exp_attach_type_optional;  	bool is_attachable;  	bool is_attach_btf; +	bool is_sleepable;  	attach_fn_t attach_fn;  }; @@ -217,20 +218,45 @@ struct bpf_sec_def {   * linux/filter.h.   */  struct bpf_program { -	/* Index in elf obj file, for relocation use. */ -	int idx; -	char *name; -	int prog_ifindex; -	char *section_name;  	const struct bpf_sec_def *sec_def; -	/* section_name with / replaced by _; makes recursive pinning +	char *sec_name; +	size_t sec_idx; +	/* this program's instruction offset (in number of instructions) +	 * within its containing ELF section +	 */ +	size_t sec_insn_off; +	/* number of original instructions in ELF section belonging to this +	 * program, not taking into account subprogram instructions possible +	 * appended later during relocation +	 */ +	size_t sec_insn_cnt; +	/* Offset (in number of instructions) of the start of instruction +	 * belonging to this BPF program  within its containing main BPF +	 * program. For the entry-point (main) BPF program, this is always +	 * zero. For a sub-program, this gets reset before each of main BPF +	 * programs are processed and relocated and is used to determined +	 * whether sub-program was already appended to the main program, and +	 * if yes, at which instruction offset. +	 */ +	size_t sub_insn_off; + +	char *name; +	/* sec_name with / replaced by _; makes recursive pinning  	 * in bpf_object__pin_programs easier  	 */  	char *pin_name; + +	/* instructions that belong to BPF program; insns[0] is located at +	 * sec_insn_off instruction within its ELF section in ELF file, so +	 * when mapping ELF file instruction index to the local instruction, +	 * one needs to subtract sec_insn_off; and vice versa. +	 */  	struct bpf_insn *insns; -	size_t insns_cnt, main_prog_cnt; -	enum bpf_prog_type type; -	bool load; +	/* actual number of instruction in this BPF program's image; for +	 * entry-point BPF programs this includes the size of main program +	 * itself plus all the used sub-programs, appended at the end +	 */ +	size_t insns_cnt;  	struct reloc_desc *reloc_desc;  	int nr_reloc; @@ -246,15 +272,16 @@ struct bpf_program {  	void *priv;  	bpf_program_clear_priv_t clear_priv; +	bool load; +	enum bpf_prog_type type;  	enum bpf_attach_type expected_attach_type; +	int prog_ifindex;  	__u32 attach_btf_id;  	__u32 attach_prog_fd;  	void *func_info;  	__u32 func_info_rec_size;  	__u32 func_info_cnt; -	struct bpf_capabilities *caps; -  	void *line_info;  	__u32 line_info_rec_size;  	__u32 line_info_cnt; @@ -363,6 +390,12 @@ struct extern_desc {  		} kcfg;  		struct {  			unsigned long long addr; + +			/* target btf_id of the corresponding kernel var. */ +			int vmlinux_btf_id; + +			/* local btf_id of the ksym extern's type. */ +			__u32 type_id;  		} ksym;  	};  }; @@ -384,9 +417,10 @@ struct bpf_object {  	struct extern_desc *externs;  	int nr_extern;  	int kconfig_map_idx; +	int rodata_map_idx;  	bool loaded; -	bool has_pseudo_calls; +	bool has_subcalls;  	/*  	 * Information when doing elf related work. Only valid if fd @@ -403,6 +437,7 @@ struct bpf_object {  		Elf_Data *rodata;  		Elf_Data *bss;  		Elf_Data *st_ops_data; +		size_t shstrndx; /* section index for section name strings */  		size_t strtabidx;  		struct {  			GElf_Shdr shdr; @@ -436,12 +471,20 @@ struct bpf_object {  	void *priv;  	bpf_object_clear_priv_t clear_priv; -	struct bpf_capabilities caps; -  	char path[];  };  #define obj_elf_valid(o)	((o)->efile.elf) +static const char *elf_sym_str(const struct bpf_object *obj, size_t off); +static const char *elf_sec_str(const struct bpf_object *obj, size_t off); +static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx); +static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name); +static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr); +static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn); +static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn); +static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx, +			      size_t off, __u32 sym_type, GElf_Sym *sym); +  void bpf_program__unload(struct bpf_program *prog)  {  	int i; @@ -481,159 +524,160 @@ static void bpf_program__exit(struct bpf_program *prog)  	bpf_program__unload(prog);  	zfree(&prog->name); -	zfree(&prog->section_name); +	zfree(&prog->sec_name);  	zfree(&prog->pin_name);  	zfree(&prog->insns);  	zfree(&prog->reloc_desc);  	prog->nr_reloc = 0;  	prog->insns_cnt = 0; -	prog->idx = -1; +	prog->sec_idx = -1;  }  static char *__bpf_program__pin_name(struct bpf_program *prog)  {  	char *name, *p; -	name = p = strdup(prog->section_name); +	name = p = strdup(prog->sec_name);  	while ((p = strchr(p, '/')))  		*p = '_';  	return name;  } +static bool insn_is_subprog_call(const struct bpf_insn *insn) +{ +	return BPF_CLASS(insn->code) == BPF_JMP && +	       BPF_OP(insn->code) == BPF_CALL && +	       BPF_SRC(insn->code) == BPF_K && +	       insn->src_reg == BPF_PSEUDO_CALL && +	       insn->dst_reg == 0 && +	       insn->off == 0; +} +  static int -bpf_program__init(void *data, size_t size, char *section_name, int idx, -		  struct bpf_program *prog) +bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, +		      const char *name, size_t sec_idx, const char *sec_name, +		      size_t sec_off, void *insn_data, size_t insn_data_sz)  { -	const size_t bpf_insn_sz = sizeof(struct bpf_insn); +	int i; -	if (size == 0 || size % bpf_insn_sz) { -		pr_warn("corrupted section '%s', size: %zu\n", -			section_name, size); +	if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) { +		pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n", +			sec_name, name, sec_off, insn_data_sz);  		return -EINVAL;  	}  	memset(prog, 0, sizeof(*prog)); +	prog->obj = obj; + +	prog->sec_idx = sec_idx; +	prog->sec_insn_off = sec_off / BPF_INSN_SZ; +	prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ; +	/* insns_cnt can later be increased by appending used subprograms */ +	prog->insns_cnt = prog->sec_insn_cnt; + +	prog->type = BPF_PROG_TYPE_UNSPEC; +	prog->load = true; + +	prog->instances.fds = NULL; +	prog->instances.nr = -1; -	prog->section_name = strdup(section_name); -	if (!prog->section_name) { -		pr_warn("failed to alloc name for prog under section(%d) %s\n", -			idx, section_name); +	prog->sec_name = strdup(sec_name); +	if (!prog->sec_name) +		goto errout; + +	prog->name = strdup(name); +	if (!prog->name)  		goto errout; -	}  	prog->pin_name = __bpf_program__pin_name(prog); -	if (!prog->pin_name) { -		pr_warn("failed to alloc pin name for prog under section(%d) %s\n", -			idx, section_name); +	if (!prog->pin_name)  		goto errout; -	} -	prog->insns = malloc(size); -	if (!prog->insns) { -		pr_warn("failed to alloc insns for prog under section %s\n", -			section_name); +	prog->insns = malloc(insn_data_sz); +	if (!prog->insns)  		goto errout; +	memcpy(prog->insns, insn_data, insn_data_sz); + +	for (i = 0; i < prog->insns_cnt; i++) { +		if (insn_is_subprog_call(&prog->insns[i])) { +			obj->has_subcalls = true; +			break; +		}  	} -	prog->insns_cnt = size / bpf_insn_sz; -	memcpy(prog->insns, data, size); -	prog->idx = idx; -	prog->instances.fds = NULL; -	prog->instances.nr = -1; -	prog->type = BPF_PROG_TYPE_UNSPEC; -	prog->load = true;  	return 0;  errout: +	pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);  	bpf_program__exit(prog);  	return -ENOMEM;  }  static int -bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, -			char *section_name, int idx) +bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, +			 const char *sec_name, int sec_idx)  { -	struct bpf_program prog, *progs; +	struct bpf_program *prog, *progs; +	void *data = sec_data->d_buf; +	size_t sec_sz = sec_data->d_size, sec_off, prog_sz;  	int nr_progs, err; +	const char *name; +	GElf_Sym sym; -	err = bpf_program__init(data, size, section_name, idx, &prog); -	if (err) -		return err; - -	prog.caps = &obj->caps;  	progs = obj->programs;  	nr_progs = obj->nr_programs; +	sec_off = 0; -	progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0])); -	if (!progs) { -		/* -		 * In this case the original obj->programs -		 * is still valid, so don't need special treat for -		 * bpf_close_object(). -		 */ -		pr_warn("failed to alloc a new program under section '%s'\n", -			section_name); -		bpf_program__exit(&prog); -		return -ENOMEM; -	} - -	pr_debug("found program %s\n", prog.section_name); -	obj->programs = progs; -	obj->nr_programs = nr_progs + 1; -	prog.obj = obj; -	progs[nr_progs] = prog; -	return 0; -} - -static int -bpf_object__init_prog_names(struct bpf_object *obj) -{ -	Elf_Data *symbols = obj->efile.symbols; -	struct bpf_program *prog; -	size_t pi, si; +	while (sec_off < sec_sz) { +		if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) { +			pr_warn("sec '%s': failed to find program symbol at offset %zu\n", +				sec_name, sec_off); +			return -LIBBPF_ERRNO__FORMAT; +		} -	for (pi = 0; pi < obj->nr_programs; pi++) { -		const char *name = NULL; +		prog_sz = sym.st_size; -		prog = &obj->programs[pi]; +		name = elf_sym_str(obj, sym.st_name); +		if (!name) { +			pr_warn("sec '%s': failed to get symbol name for offset %zu\n", +				sec_name, sec_off); +			return -LIBBPF_ERRNO__FORMAT; +		} -		for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name; -		     si++) { -			GElf_Sym sym; +		if (sec_off + prog_sz > sec_sz) { +			pr_warn("sec '%s': program at offset %zu crosses section boundary\n", +				sec_name, sec_off); +			return -LIBBPF_ERRNO__FORMAT; +		} -			if (!gelf_getsym(symbols, si, &sym)) -				continue; -			if (sym.st_shndx != prog->idx) -				continue; -			if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL) -				continue; +		pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n", +			 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz); -			name = elf_strptr(obj->efile.elf, -					  obj->efile.strtabidx, -					  sym.st_name); -			if (!name) { -				pr_warn("failed to get sym name string for prog %s\n", -					prog->section_name); -				return -LIBBPF_ERRNO__LIBELF; -			} +		progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs)); +		if (!progs) { +			/* +			 * In this case the original obj->programs +			 * is still valid, so don't need special treat for +			 * bpf_close_object(). +			 */ +			pr_warn("sec '%s': failed to alloc memory for new program '%s'\n", +				sec_name, name); +			return -ENOMEM;  		} +		obj->programs = progs; -		if (!name && prog->idx == obj->efile.text_shndx) -			name = ".text"; +		prog = &progs[nr_progs]; -		if (!name) { -			pr_warn("failed to find sym for prog %s\n", -				prog->section_name); -			return -EINVAL; -		} +		err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name, +					    sec_off, data + sec_off, prog_sz); +		if (err) +			return err; -		prog->name = strdup(name); -		if (!prog->name) { -			pr_warn("failed to allocate memory for prog sym %s\n", -				name); -			return -ENOMEM; -		} +		nr_progs++; +		obj->nr_programs = nr_progs; + +		sec_off += prog_sz;  	}  	return 0; @@ -1035,6 +1079,7 @@ static struct bpf_object *bpf_object__new(const char *path,  	obj->efile.bss_shndx = -1;  	obj->efile.st_ops_shndx = -1;  	obj->kconfig_map_idx = -1; +	obj->rodata_map_idx = -1;  	obj->kern_version = get_kernel_version();  	obj->loaded = false; @@ -1066,13 +1111,18 @@ static void bpf_object__elf_finish(struct bpf_object *obj)  	obj->efile.obj_buf_sz = 0;  } +/* if libelf is old and doesn't support mmap(), fall back to read() */ +#ifndef ELF_C_READ_MMAP +#define ELF_C_READ_MMAP ELF_C_READ +#endif +  static int bpf_object__elf_init(struct bpf_object *obj)  {  	int err = 0;  	GElf_Ehdr *ep;  	if (obj_elf_valid(obj)) { -		pr_warn("elf init: internal error\n"); +		pr_warn("elf: init internal error\n");  		return -LIBBPF_ERRNO__LIBELF;  	} @@ -1090,31 +1140,44 @@ static int bpf_object__elf_init(struct bpf_object *obj)  			err = -errno;  			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); -			pr_warn("failed to open %s: %s\n", obj->path, cp); +			pr_warn("elf: failed to open %s: %s\n", obj->path, cp);  			return err;  		} -		obj->efile.elf = elf_begin(obj->efile.fd, -					   LIBBPF_ELF_C_READ_MMAP, NULL); +		obj->efile.elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);  	}  	if (!obj->efile.elf) { -		pr_warn("failed to open %s as ELF file\n", obj->path); +		pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));  		err = -LIBBPF_ERRNO__LIBELF;  		goto errout;  	}  	if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { -		pr_warn("failed to get EHDR from %s\n", obj->path); +		pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));  		err = -LIBBPF_ERRNO__FORMAT;  		goto errout;  	}  	ep = &obj->efile.ehdr; +	if (elf_getshdrstrndx(obj->efile.elf, &obj->efile.shstrndx)) { +		pr_warn("elf: failed to get section names section index for %s: %s\n", +			obj->path, elf_errmsg(-1)); +		err = -LIBBPF_ERRNO__FORMAT; +		goto errout; +	} + +	/* Elf is corrupted/truncated, avoid calling elf_strptr. */ +	if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) { +		pr_warn("elf: failed to get section names strings from %s: %s\n", +			obj->path, elf_errmsg(-1)); +		return -LIBBPF_ERRNO__FORMAT; +	} +  	/* Old LLVM set e_machine to EM_NONE */  	if (ep->e_type != ET_REL ||  	    (ep->e_machine && ep->e_machine != EM_BPF)) { -		pr_warn("%s is not an eBPF object file\n", obj->path); +		pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);  		err = -LIBBPF_ERRNO__FORMAT;  		goto errout;  	} @@ -1136,7 +1199,7 @@ static int bpf_object__check_endianness(struct bpf_object *obj)  #else  # error "Unrecognized __BYTE_ORDER__"  #endif -	pr_warn("endianness mismatch.\n"); +	pr_warn("elf: endianness mismatch in %s.\n", obj->path);  	return -LIBBPF_ERRNO__ENDIAN;  } @@ -1171,55 +1234,10 @@ static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)  	return false;  } -static int bpf_object_search_section_size(const struct bpf_object *obj, -					  const char *name, size_t *d_size) -{ -	const GElf_Ehdr *ep = &obj->efile.ehdr; -	Elf *elf = obj->efile.elf; -	Elf_Scn *scn = NULL; -	int idx = 0; - -	while ((scn = elf_nextscn(elf, scn)) != NULL) { -		const char *sec_name; -		Elf_Data *data; -		GElf_Shdr sh; - -		idx++; -		if (gelf_getshdr(scn, &sh) != &sh) { -			pr_warn("failed to get section(%d) header from %s\n", -				idx, obj->path); -			return -EIO; -		} - -		sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); -		if (!sec_name) { -			pr_warn("failed to get section(%d) name from %s\n", -				idx, obj->path); -			return -EIO; -		} - -		if (strcmp(name, sec_name)) -			continue; - -		data = elf_getdata(scn, 0); -		if (!data) { -			pr_warn("failed to get section(%d) data from %s(%s)\n", -				idx, name, obj->path); -			return -EIO; -		} - -		*d_size = data->d_size; -		return 0; -	} - -	return -ENOENT; -} -  int bpf_object__section_size(const struct bpf_object *obj, const char *name,  			     __u32 *size)  {  	int ret = -ENOENT; -	size_t d_size;  	*size = 0;  	if (!name) { @@ -1237,9 +1255,13 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,  		if (obj->efile.st_ops_data)  			*size = obj->efile.st_ops_data->d_size;  	} else { -		ret = bpf_object_search_section_size(obj, name, &d_size); -		if (!ret) -			*size = d_size; +		Elf_Scn *scn = elf_sec_by_name(obj, name); +		Elf_Data *data = elf_sec_data(obj, scn); + +		if (data) { +			ret = 0; /* found it */ +			*size = data->d_size; +		}  	}  	return *size ? 0 : ret; @@ -1264,8 +1286,7 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,  		    GELF_ST_TYPE(sym.st_info) != STT_OBJECT)  			continue; -		sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx, -				   sym.st_name); +		sname = elf_sym_str(obj, sym.st_name);  		if (!sname) {  			pr_warn("failed to get sym name string for var %s\n",  				name); @@ -1290,7 +1311,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)  		return &obj->maps[obj->nr_maps++];  	new_cap = max((size_t)4, obj->maps_cap * 3 / 2); -	new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps)); +	new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));  	if (!new_maps) {  		pr_warn("alloc maps for object failed\n");  		return ERR_PTR(-ENOMEM); @@ -1417,6 +1438,8 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)  						    obj->efile.rodata->d_size);  		if (err)  			return err; + +		obj->rodata_map_idx = obj->nr_maps - 1;  	}  	if (obj->efile.bss_shndx >= 0) {  		err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, @@ -1742,12 +1765,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)  	if (!symbols)  		return -EINVAL; -	scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx); -	if (scn) -		data = elf_getdata(scn, NULL); + +	scn = elf_sec_by_idx(obj, obj->efile.maps_shndx); +	data = elf_sec_data(obj, scn);  	if (!scn || !data) { -		pr_warn("failed to get Elf_Data from map section %d\n", -			obj->efile.maps_shndx); +		pr_warn("elf: failed to get legacy map definitions for %s\n", +			obj->path);  		return -EINVAL;  	} @@ -1769,12 +1792,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)  		nr_maps++;  	}  	/* Assume equally sized map definitions */ -	pr_debug("maps in %s: %d maps in %zd bytes\n", -		 obj->path, nr_maps, data->d_size); +	pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n", +		 nr_maps, data->d_size, obj->path);  	if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) { -		pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n", -			obj->path, nr_maps, data->d_size); +		pr_warn("elf: unable to determine legacy map definition size in %s\n", +			obj->path);  		return -EINVAL;  	}  	map_def_sz = data->d_size / nr_maps; @@ -1795,8 +1818,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)  		if (IS_ERR(map))  			return PTR_ERR(map); -		map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, -				      sym.st_name); +		map_name = elf_sym_str(obj, sym.st_name);  		if (!map_name) {  			pr_warn("failed to get map #%d name sym string for obj %s\n",  				i, obj->path); @@ -1884,6 +1906,29 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)  	return btf_is_func_proto(t) ? t : NULL;  } +static const char *btf_kind_str(const struct btf_type *t) +{ +	switch (btf_kind(t)) { +	case BTF_KIND_UNKN: return "void"; +	case BTF_KIND_INT: return "int"; +	case BTF_KIND_PTR: return "ptr"; +	case BTF_KIND_ARRAY: return "array"; +	case BTF_KIND_STRUCT: return "struct"; +	case BTF_KIND_UNION: return "union"; +	case BTF_KIND_ENUM: return "enum"; +	case BTF_KIND_FWD: return "fwd"; +	case BTF_KIND_TYPEDEF: return "typedef"; +	case BTF_KIND_VOLATILE: return "volatile"; +	case BTF_KIND_CONST: return "const"; +	case BTF_KIND_RESTRICT: return "restrict"; +	case BTF_KIND_FUNC: return "func"; +	case BTF_KIND_FUNC_PROTO: return "func_proto"; +	case BTF_KIND_VAR: return "var"; +	case BTF_KIND_DATASEC: return "datasec"; +	default: return "unknown"; +	} +} +  /*   * Fetch integer attribute of BTF map definition. Such attributes are   * represented using a pointer to an array, in which dimensionality of array @@ -1900,8 +1945,8 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,  	const struct btf_type *arr_t;  	if (!btf_is_ptr(t)) { -		pr_warn("map '%s': attr '%s': expected PTR, got %u.\n", -			map_name, name, btf_kind(t)); +		pr_warn("map '%s': attr '%s': expected PTR, got %s.\n", +			map_name, name, btf_kind_str(t));  		return false;  	} @@ -1912,8 +1957,8 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,  		return false;  	}  	if (!btf_is_array(arr_t)) { -		pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n", -			map_name, name, btf_kind(arr_t)); +		pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n", +			map_name, name, btf_kind_str(arr_t));  		return false;  	}  	arr_info = btf_array(arr_t); @@ -1924,7 +1969,7 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,  static int build_map_pin_path(struct bpf_map *map, const char *path)  {  	char buf[PATH_MAX]; -	int err, len; +	int len;  	if (!path)  		path = "/sys/fs/bpf"; @@ -1935,11 +1980,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)  	else if (len >= PATH_MAX)  		return -ENAMETOOLONG; -	err = bpf_map__set_pin_path(map, buf); -	if (err) -		return err; - -	return 0; +	return bpf_map__set_pin_path(map, buf);  } @@ -2007,8 +2048,8 @@ static int parse_btf_map_def(struct bpf_object *obj,  				return -EINVAL;  			}  			if (!btf_is_ptr(t)) { -				pr_warn("map '%s': key spec is not PTR: %u.\n", -					map->name, btf_kind(t)); +				pr_warn("map '%s': key spec is not PTR: %s.\n", +					map->name, btf_kind_str(t));  				return -EINVAL;  			}  			sz = btf__resolve_size(obj->btf, t->type); @@ -2049,8 +2090,8 @@ static int parse_btf_map_def(struct bpf_object *obj,  				return -EINVAL;  			}  			if (!btf_is_ptr(t)) { -				pr_warn("map '%s': value spec is not PTR: %u.\n", -					map->name, btf_kind(t)); +				pr_warn("map '%s': value spec is not PTR: %s.\n", +					map->name, btf_kind_str(t));  				return -EINVAL;  			}  			sz = btf__resolve_size(obj->btf, t->type); @@ -2107,14 +2148,14 @@ static int parse_btf_map_def(struct bpf_object *obj,  			t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,  						   NULL);  			if (!btf_is_ptr(t)) { -				pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n", -					map->name, btf_kind(t)); +				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", +					map->name, btf_kind_str(t));  				return -EINVAL;  			}  			t = skip_mods_and_typedefs(obj->btf, t->type, NULL);  			if (!btf_is_struct(t)) { -				pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n", -					map->name, btf_kind(t)); +				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", +					map->name, btf_kind_str(t));  				return -EINVAL;  			} @@ -2205,8 +2246,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,  		return -EINVAL;  	}  	if (!btf_is_var(var)) { -		pr_warn("map '%s': unexpected var kind %u.\n", -			map_name, btf_kind(var)); +		pr_warn("map '%s': unexpected var kind %s.\n", +			map_name, btf_kind_str(var));  		return -EINVAL;  	}  	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && @@ -2218,8 +2259,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,  	def = skip_mods_and_typedefs(obj->btf, var->type, NULL);  	if (!btf_is_struct(def)) { -		pr_warn("map '%s': unexpected def kind %u.\n", -			map_name, btf_kind(var)); +		pr_warn("map '%s': unexpected def kind %s.\n", +			map_name, btf_kind_str(var));  		return -EINVAL;  	}  	if (def->size > vi->size) { @@ -2259,12 +2300,11 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,  	if (obj->efile.btf_maps_shndx < 0)  		return 0; -	scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx); -	if (scn) -		data = elf_getdata(scn, NULL); +	scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx); +	data = elf_sec_data(obj, scn);  	if (!scn || !data) { -		pr_warn("failed to get Elf_Data from map section %d (%s)\n", -			obj->efile.btf_maps_shndx, MAPS_ELF_SEC); +		pr_warn("elf: failed to get %s map definitions for %s\n", +			MAPS_ELF_SEC, obj->path);  		return -EINVAL;  	} @@ -2322,36 +2362,28 @@ static int bpf_object__init_maps(struct bpf_object *obj,  static bool section_have_execinstr(struct bpf_object *obj, int idx)  { -	Elf_Scn *scn;  	GElf_Shdr sh; -	scn = elf_getscn(obj->efile.elf, idx); -	if (!scn) -		return false; - -	if (gelf_getshdr(scn, &sh) != &sh) +	if (elf_sec_hdr(obj, elf_sec_by_idx(obj, idx), &sh))  		return false; -	if (sh.sh_flags & SHF_EXECINSTR) -		return true; - -	return false; +	return sh.sh_flags & SHF_EXECINSTR;  }  static bool btf_needs_sanitization(struct bpf_object *obj)  { -	bool has_func_global = obj->caps.btf_func_global; -	bool has_datasec = obj->caps.btf_datasec; -	bool has_func = obj->caps.btf_func; +	bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC); +	bool has_datasec = kernel_supports(FEAT_BTF_DATASEC); +	bool has_func = kernel_supports(FEAT_BTF_FUNC);  	return !has_func || !has_datasec || !has_func_global;  }  static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)  { -	bool has_func_global = obj->caps.btf_func_global; -	bool has_datasec = obj->caps.btf_datasec; -	bool has_func = obj->caps.btf_func; +	bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC); +	bool has_datasec = kernel_supports(FEAT_BTF_DATASEC); +	bool has_func = kernel_supports(FEAT_BTF_FUNC);  	struct btf_type *t;  	int i, j, vlen; @@ -2496,12 +2528,23 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)  {  	bool need_vmlinux_btf = false;  	struct bpf_program *prog; -	int err; +	int i, err;  	/* CO-RE relocations need kernel BTF */ -	if (obj->btf_ext && obj->btf_ext->field_reloc_info.len) +	if (obj->btf_ext && obj->btf_ext->core_relo_info.len)  		need_vmlinux_btf = true; +	/* Support for typed ksyms needs kernel BTF */ +	for (i = 0; i < obj->nr_extern; i++) { +		const struct extern_desc *ext; + +		ext = &obj->externs[i]; +		if (ext->type == EXT_KSYM && ext->ksym.type_id) { +			need_vmlinux_btf = true; +			break; +		} +	} +  	bpf_object__for_each_program(prog, obj) {  		if (!prog->load)  			continue; @@ -2533,6 +2576,15 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)  	if (!obj->btf)  		return 0; +	if (!kernel_supports(FEAT_BTF)) { +		if (kernel_needs_btf(obj)) { +			err = -EOPNOTSUPP; +			goto report; +		} +		pr_debug("Kernel doesn't support BTF, skipping uploading it.\n"); +		return 0; +	} +  	sanitize = btf_needs_sanitization(obj);  	if (sanitize) {  		const void *raw_data; @@ -2558,6 +2610,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)  		}  		btf__free(kern_btf);  	} +report:  	if (err) {  		btf_mandatory = kernel_needs_btf(obj);  		pr_warn("Error loading .BTF into kernel: %d. %s\n", err, @@ -2569,61 +2622,255 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)  	return err;  } +static const char *elf_sym_str(const struct bpf_object *obj, size_t off) +{ +	const char *name; + +	name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off); +	if (!name) { +		pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", +			off, obj->path, elf_errmsg(-1)); +		return NULL; +	} + +	return name; +} + +static const char *elf_sec_str(const struct bpf_object *obj, size_t off) +{ +	const char *name; + +	name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off); +	if (!name) { +		pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n", +			off, obj->path, elf_errmsg(-1)); +		return NULL; +	} + +	return name; +} + +static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx) +{ +	Elf_Scn *scn; + +	scn = elf_getscn(obj->efile.elf, idx); +	if (!scn) { +		pr_warn("elf: failed to get section(%zu) from %s: %s\n", +			idx, obj->path, elf_errmsg(-1)); +		return NULL; +	} +	return scn; +} + +static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name) +{ +	Elf_Scn *scn = NULL; +	Elf *elf = obj->efile.elf; +	const char *sec_name; + +	while ((scn = elf_nextscn(elf, scn)) != NULL) { +		sec_name = elf_sec_name(obj, scn); +		if (!sec_name) +			return NULL; + +		if (strcmp(sec_name, name) != 0) +			continue; + +		return scn; +	} +	return NULL; +} + +static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr) +{ +	if (!scn) +		return -EINVAL; + +	if (gelf_getshdr(scn, hdr) != hdr) { +		pr_warn("elf: failed to get section(%zu) header from %s: %s\n", +			elf_ndxscn(scn), obj->path, elf_errmsg(-1)); +		return -EINVAL; +	} + +	return 0; +} + +static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn) +{ +	const char *name; +	GElf_Shdr sh; + +	if (!scn) +		return NULL; + +	if (elf_sec_hdr(obj, scn, &sh)) +		return NULL; + +	name = elf_sec_str(obj, sh.sh_name); +	if (!name) { +		pr_warn("elf: failed to get section(%zu) name from %s: %s\n", +			elf_ndxscn(scn), obj->path, elf_errmsg(-1)); +		return NULL; +	} + +	return name; +} + +static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn) +{ +	Elf_Data *data; + +	if (!scn) +		return NULL; + +	data = elf_getdata(scn, 0); +	if (!data) { +		pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n", +			elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>", +			obj->path, elf_errmsg(-1)); +		return NULL; +	} + +	return data; +} + +static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx, +			      size_t off, __u32 sym_type, GElf_Sym *sym) +{ +	Elf_Data *symbols = obj->efile.symbols; +	size_t n = symbols->d_size / sizeof(GElf_Sym); +	int i; + +	for (i = 0; i < n; i++) { +		if (!gelf_getsym(symbols, i, sym)) +			continue; +		if (sym->st_shndx != sec_idx || sym->st_value != off) +			continue; +		if (GELF_ST_TYPE(sym->st_info) != sym_type) +			continue; +		return 0; +	} + +	return -ENOENT; +} + +static bool is_sec_name_dwarf(const char *name) +{ +	/* approximation, but the actual list is too long */ +	return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0; +} + +static bool ignore_elf_section(GElf_Shdr *hdr, const char *name) +{ +	/* no special handling of .strtab */ +	if (hdr->sh_type == SHT_STRTAB) +		return true; + +	/* ignore .llvm_addrsig section as well */ +	if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */) +		return true; + +	/* no subprograms will lead to an empty .text section, ignore it */ +	if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 && +	    strcmp(name, ".text") == 0) +		return true; + +	/* DWARF sections */ +	if (is_sec_name_dwarf(name)) +		return true; + +	if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) { +		name += sizeof(".rel") - 1; +		/* DWARF section relocations */ +		if (is_sec_name_dwarf(name)) +			return true; + +		/* .BTF and .BTF.ext don't need relocations */ +		if (strcmp(name, BTF_ELF_SEC) == 0 || +		    strcmp(name, BTF_EXT_ELF_SEC) == 0) +			return true; +	} + +	return false; +} + +static int cmp_progs(const void *_a, const void *_b) +{ +	const struct bpf_program *a = _a; +	const struct bpf_program *b = _b; + +	if (a->sec_idx != b->sec_idx) +		return a->sec_idx < b->sec_idx ? -1 : 1; + +	/* sec_insn_off can't be the same within the section */ +	return a->sec_insn_off < b->sec_insn_off ? -1 : 1; +} +  static int bpf_object__elf_collect(struct bpf_object *obj)  {  	Elf *elf = obj->efile.elf; -	GElf_Ehdr *ep = &obj->efile.ehdr;  	Elf_Data *btf_ext_data = NULL;  	Elf_Data *btf_data = NULL; -	Elf_Scn *scn = NULL;  	int idx = 0, err = 0; +	const char *name; +	Elf_Data *data; +	Elf_Scn *scn; +	GElf_Shdr sh; -	/* Elf is corrupted/truncated, avoid calling elf_strptr. */ -	if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { -		pr_warn("failed to get e_shstrndx from %s\n", obj->path); -		return -LIBBPF_ERRNO__FORMAT; +	/* a bunch of ELF parsing functionality depends on processing symbols, +	 * so do the first pass and find the symbol table +	 */ +	scn = NULL; +	while ((scn = elf_nextscn(elf, scn)) != NULL) { +		if (elf_sec_hdr(obj, scn, &sh)) +			return -LIBBPF_ERRNO__FORMAT; + +		if (sh.sh_type == SHT_SYMTAB) { +			if (obj->efile.symbols) { +				pr_warn("elf: multiple symbol tables in %s\n", obj->path); +				return -LIBBPF_ERRNO__FORMAT; +			} + +			data = elf_sec_data(obj, scn); +			if (!data) +				return -LIBBPF_ERRNO__FORMAT; + +			obj->efile.symbols = data; +			obj->efile.symbols_shndx = elf_ndxscn(scn); +			obj->efile.strtabidx = sh.sh_link; +		}  	} +	scn = NULL;  	while ((scn = elf_nextscn(elf, scn)) != NULL) { -		char *name; -		GElf_Shdr sh; -		Elf_Data *data; -  		idx++; -		if (gelf_getshdr(scn, &sh) != &sh) { -			pr_warn("failed to get section(%d) header from %s\n", -				idx, obj->path); + +		if (elf_sec_hdr(obj, scn, &sh))  			return -LIBBPF_ERRNO__FORMAT; -		} -		name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); -		if (!name) { -			pr_warn("failed to get section(%d) name from %s\n", -				idx, obj->path); +		name = elf_sec_str(obj, sh.sh_name); +		if (!name)  			return -LIBBPF_ERRNO__FORMAT; -		} -		data = elf_getdata(scn, 0); -		if (!data) { -			pr_warn("failed to get section(%d) data from %s(%s)\n", -				idx, name, obj->path); +		if (ignore_elf_section(&sh, name)) +			continue; + +		data = elf_sec_data(obj, scn); +		if (!data)  			return -LIBBPF_ERRNO__FORMAT; -		} -		pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", + +		pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",  			 idx, name, (unsigned long)data->d_size,  			 (int)sh.sh_link, (unsigned long)sh.sh_flags,  			 (int)sh.sh_type);  		if (strcmp(name, "license") == 0) { -			err = bpf_object__init_license(obj, -						       data->d_buf, -						       data->d_size); +			err = bpf_object__init_license(obj, data->d_buf, data->d_size);  			if (err)  				return err;  		} else if (strcmp(name, "version") == 0) { -			err = bpf_object__init_kversion(obj, -							data->d_buf, -							data->d_size); +			err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);  			if (err)  				return err;  		} else if (strcmp(name, "maps") == 0) { @@ -2635,31 +2882,14 @@ static int bpf_object__elf_collect(struct bpf_object *obj)  		} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {  			btf_ext_data = data;  		} else if (sh.sh_type == SHT_SYMTAB) { -			if (obj->efile.symbols) { -				pr_warn("bpf: multiple SYMTAB in %s\n", -					obj->path); -				return -LIBBPF_ERRNO__FORMAT; -			} -			obj->efile.symbols = data; -			obj->efile.symbols_shndx = idx; -			obj->efile.strtabidx = sh.sh_link; +			/* already processed during the first pass above */  		} else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {  			if (sh.sh_flags & SHF_EXECINSTR) {  				if (strcmp(name, ".text") == 0)  					obj->efile.text_shndx = idx; -				err = bpf_object__add_program(obj, data->d_buf, -							      data->d_size, -							      name, idx); -				if (err) { -					char errmsg[STRERR_BUFSIZE]; -					char *cp; - -					cp = libbpf_strerror_r(-err, errmsg, -							       sizeof(errmsg)); -					pr_warn("failed to alloc program %s (%s): %s", -						name, obj->path, cp); +				err = bpf_object__add_programs(obj, data, name, idx); +				if (err)  					return err; -				}  			} else if (strcmp(name, DATA_SEC) == 0) {  				obj->efile.data = data;  				obj->efile.data_shndx = idx; @@ -2670,7 +2900,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)  				obj->efile.st_ops_data = data;  				obj->efile.st_ops_shndx = idx;  			} else { -				pr_debug("skip section(%d) %s\n", idx, name); +				pr_info("elf: skipping unrecognized data section(%d) %s\n", +					idx, name);  			}  		} else if (sh.sh_type == SHT_REL) {  			int nr_sects = obj->efile.nr_reloc_sects; @@ -2681,36 +2912,40 @@ static int bpf_object__elf_collect(struct bpf_object *obj)  			if (!section_have_execinstr(obj, sec) &&  			    strcmp(name, ".rel" STRUCT_OPS_SEC) &&  			    strcmp(name, ".rel" MAPS_ELF_SEC)) { -				pr_debug("skip relo %s(%d) for section(%d)\n", -					 name, idx, sec); +				pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n", +					idx, name, sec, +					elf_sec_name(obj, elf_sec_by_idx(obj, sec)) ?: "<?>");  				continue;  			} -			sects = reallocarray(sects, nr_sects + 1, -					     sizeof(*obj->efile.reloc_sects)); -			if (!sects) { -				pr_warn("reloc_sects realloc failed\n"); +			sects = libbpf_reallocarray(sects, nr_sects + 1, +						    sizeof(*obj->efile.reloc_sects)); +			if (!sects)  				return -ENOMEM; -			}  			obj->efile.reloc_sects = sects;  			obj->efile.nr_reloc_sects++;  			obj->efile.reloc_sects[nr_sects].shdr = sh;  			obj->efile.reloc_sects[nr_sects].data = data; -		} else if (sh.sh_type == SHT_NOBITS && -			   strcmp(name, BSS_SEC) == 0) { +		} else if (sh.sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {  			obj->efile.bss = data;  			obj->efile.bss_shndx = idx;  		} else { -			pr_debug("skip section(%d) %s\n", idx, name); +			pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name, +				(size_t)sh.sh_size);  		}  	}  	if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) { -		pr_warn("Corrupted ELF file: index of strtab invalid\n"); +		pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);  		return -LIBBPF_ERRNO__FORMAT;  	} + +	/* sort BPF programs by section name and in-section instruction offset +	 * for faster search */ +	qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); +  	return bpf_object__init_btf(obj, btf_data, btf_ext_data);  } @@ -2869,14 +3104,13 @@ static int bpf_object__collect_externs(struct bpf_object *obj)  	if (!obj->efile.symbols)  		return 0; -	scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx); -	if (!scn) +	scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx); +	if (elf_sec_hdr(obj, scn, &sh))  		return -LIBBPF_ERRNO__FORMAT; -	if (gelf_getshdr(scn, &sh) != &sh) -		return -LIBBPF_ERRNO__FORMAT; -	n = sh.sh_size / sh.sh_entsize; +	n = sh.sh_size / sh.sh_entsize;  	pr_debug("looking for externs among %d symbols...\n", n); +  	for (i = 0; i < n; i++) {  		GElf_Sym sym; @@ -2884,13 +3118,12 @@ static int bpf_object__collect_externs(struct bpf_object *obj)  			return -LIBBPF_ERRNO__FORMAT;  		if (!sym_is_extern(&sym))  			continue; -		ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, -				      sym.st_name); +		ext_name = elf_sym_str(obj, sym.st_name);  		if (!ext_name || !ext_name[0])  			continue;  		ext = obj->externs; -		ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); +		ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));  		if (!ext)  			return -ENOMEM;  		obj->externs = ext; @@ -2940,16 +3173,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj)  				return -ENOTSUP;  			}  		} else if (strcmp(sec_name, KSYMS_SEC) == 0) { -			const struct btf_type *vt; -  			ksym_sec = sec;  			ext->type = EXT_KSYM; - -			vt = skip_mods_and_typedefs(obj->btf, t->type, NULL); -			if (!btf_is_void(vt)) { -				pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name); -				return -ENOTSUP; -			} +			skip_mods_and_typedefs(obj->btf, t->type, +					       &ext->ksym.type_id);  		} else {  			pr_warn("unrecognized extern section '%s'\n", sec_name);  			return -ENOTSUP; @@ -3037,20 +3264,6 @@ static int bpf_object__collect_externs(struct bpf_object *obj)  	return 0;  } -static struct bpf_program * -bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) -{ -	struct bpf_program *prog; -	size_t i; - -	for (i = 0; i < obj->nr_programs; i++) { -		prog = &obj->programs[i]; -		if (prog->idx == idx) -			return prog; -	} -	return NULL; -} -  struct bpf_program *  bpf_object__find_program_by_title(const struct bpf_object *obj,  				  const char *title) @@ -3058,12 +3271,18 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,  	struct bpf_program *pos;  	bpf_object__for_each_program(pos, obj) { -		if (pos->section_name && !strcmp(pos->section_name, title)) +		if (pos->sec_name && !strcmp(pos->sec_name, title))  			return pos;  	}  	return NULL;  } +static bool prog_is_subprog(const struct bpf_object *obj, +			    const struct bpf_program *prog) +{ +	return prog->sec_idx == obj->efile.text_shndx && obj->has_subcalls; +} +  struct bpf_program *  bpf_object__find_program_by_name(const struct bpf_object *obj,  				 const char *name) @@ -3071,6 +3290,8 @@ bpf_object__find_program_by_name(const struct bpf_object *obj,  	struct bpf_program *prog;  	bpf_object__for_each_program(prog, obj) { +		if (prog_is_subprog(obj, prog)) +			continue;  		if (!strcmp(prog->name, name))  			return prog;  	} @@ -3109,7 +3330,7 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)  static int bpf_program__record_reloc(struct bpf_program *prog,  				     struct reloc_desc *reloc_desc, -				     __u32 insn_idx, const char *name, +				     __u32 insn_idx, const char *sym_name,  				     const GElf_Sym *sym, const GElf_Rel *rel)  {  	struct bpf_insn *insn = &prog->insns[insn_idx]; @@ -3117,34 +3338,38 @@ static int bpf_program__record_reloc(struct bpf_program *prog,  	struct bpf_object *obj = prog->obj;  	__u32 shdr_idx = sym->st_shndx;  	enum libbpf_map_type type; +	const char *sym_sec_name;  	struct bpf_map *map; +	reloc_desc->processed = false; +  	/* sub-program call relocation */  	if (insn->code == (BPF_JMP | BPF_CALL)) {  		if (insn->src_reg != BPF_PSEUDO_CALL) { -			pr_warn("incorrect bpf_call opcode\n"); +			pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);  			return -LIBBPF_ERRNO__RELOC;  		}  		/* text_shndx can be 0, if no default "main" program exists */  		if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { -			pr_warn("bad call relo against section %u\n", shdr_idx); +			sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx)); +			pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n", +				prog->name, sym_name, sym_sec_name);  			return -LIBBPF_ERRNO__RELOC;  		} -		if (sym->st_value % 8) { -			pr_warn("bad call relo offset: %zu\n", -				(size_t)sym->st_value); +		if (sym->st_value % BPF_INSN_SZ) { +			pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n", +				prog->name, sym_name, (size_t)sym->st_value);  			return -LIBBPF_ERRNO__RELOC;  		}  		reloc_desc->type = RELO_CALL;  		reloc_desc->insn_idx = insn_idx;  		reloc_desc->sym_off = sym->st_value; -		obj->has_pseudo_calls = true;  		return 0;  	}  	if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) { -		pr_warn("invalid relo for insns[%d].code 0x%x\n", -			insn_idx, insn->code); +		pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n", +			prog->name, sym_name, insn_idx, insn->code);  		return -LIBBPF_ERRNO__RELOC;  	} @@ -3159,12 +3384,12 @@ static int bpf_program__record_reloc(struct bpf_program *prog,  				break;  		}  		if (i >= n) { -			pr_warn("extern relo failed to find extern for sym %d\n", -				sym_idx); +			pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n", +				prog->name, sym_name, sym_idx);  			return -LIBBPF_ERRNO__RELOC;  		} -		pr_debug("found extern #%d '%s' (sym %d) for insn %u\n", -			 i, ext->name, ext->sym_idx, insn_idx); +		pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n", +			 prog->name, i, ext->name, ext->sym_idx, insn_idx);  		reloc_desc->type = RELO_EXTERN;  		reloc_desc->insn_idx = insn_idx;  		reloc_desc->sym_off = i; /* sym_off stores extern index */ @@ -3172,18 +3397,19 @@ static int bpf_program__record_reloc(struct bpf_program *prog,  	}  	if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { -		pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n", -			name, shdr_idx); +		pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n", +			prog->name, sym_name, shdr_idx);  		return -LIBBPF_ERRNO__RELOC;  	}  	type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); +	sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));  	/* generic map reference relocation */  	if (type == LIBBPF_MAP_UNSPEC) {  		if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { -			pr_warn("bad map relo against section %u\n", -				shdr_idx); +			pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n", +				prog->name, sym_name, sym_sec_name);  			return -LIBBPF_ERRNO__RELOC;  		}  		for (map_idx = 0; map_idx < nr_maps; map_idx++) { @@ -3192,14 +3418,14 @@ static int bpf_program__record_reloc(struct bpf_program *prog,  			    map->sec_idx != sym->st_shndx ||  			    map->sec_offset != sym->st_value)  				continue; -			pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n", -				 map_idx, map->name, map->sec_idx, +			pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n", +				 prog->name, map_idx, map->name, map->sec_idx,  				 map->sec_offset, insn_idx);  			break;  		}  		if (map_idx >= nr_maps) { -			pr_warn("map relo failed to find map for sec %u, off %zu\n", -				shdr_idx, (size_t)sym->st_value); +			pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n", +				prog->name, sym_sec_name, (size_t)sym->st_value);  			return -LIBBPF_ERRNO__RELOC;  		}  		reloc_desc->type = RELO_LD64; @@ -3211,21 +3437,22 @@ static int bpf_program__record_reloc(struct bpf_program *prog,  	/* global data map relocation */  	if (!bpf_object__shndx_is_data(obj, shdr_idx)) { -		pr_warn("bad data relo against section %u\n", shdr_idx); +		pr_warn("prog '%s': bad data relo against section '%s'\n", +			prog->name, sym_sec_name);  		return -LIBBPF_ERRNO__RELOC;  	}  	for (map_idx = 0; map_idx < nr_maps; map_idx++) {  		map = &obj->maps[map_idx];  		if (map->libbpf_type != type)  			continue; -		pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n", -			 map_idx, map->name, map->sec_idx, map->sec_offset, -			 insn_idx); +		pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n", +			 prog->name, map_idx, map->name, map->sec_idx, +			 map->sec_offset, insn_idx);  		break;  	}  	if (map_idx >= nr_maps) { -		pr_warn("data relo failed to find map for sec %u\n", -			shdr_idx); +		pr_warn("prog '%s': data relo failed to find map for section '%s'\n", +			prog->name, sym_sec_name);  		return -LIBBPF_ERRNO__RELOC;  	} @@ -3236,55 +3463,113 @@ static int bpf_program__record_reloc(struct bpf_program *prog,  	return 0;  } +static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx) +{ +	return insn_idx >= prog->sec_insn_off && +	       insn_idx < prog->sec_insn_off + prog->sec_insn_cnt; +} + +static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj, +						 size_t sec_idx, size_t insn_idx) +{ +	int l = 0, r = obj->nr_programs - 1, m; +	struct bpf_program *prog; + +	while (l < r) { +		m = l + (r - l + 1) / 2; +		prog = &obj->programs[m]; + +		if (prog->sec_idx < sec_idx || +		    (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx)) +			l = m; +		else +			r = m - 1; +	} +	/* matching program could be at index l, but it still might be the +	 * wrong one, so we need to double check conditions for the last time +	 */ +	prog = &obj->programs[l]; +	if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx)) +		return prog; +	return NULL; +} +  static int -bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, -			   Elf_Data *data, struct bpf_object *obj) +bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data *data)  {  	Elf_Data *symbols = obj->efile.symbols; +	const char *relo_sec_name, *sec_name; +	size_t sec_idx = shdr->sh_info; +	struct bpf_program *prog; +	struct reloc_desc *relos;  	int err, i, nrels; +	const char *sym_name; +	__u32 insn_idx; +	GElf_Sym sym; +	GElf_Rel rel; -	pr_debug("collecting relocating info for: '%s'\n", prog->section_name); -	nrels = shdr->sh_size / shdr->sh_entsize; +	relo_sec_name = elf_sec_str(obj, shdr->sh_name); +	sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); +	if (!relo_sec_name || !sec_name) +		return -EINVAL; -	prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels); -	if (!prog->reloc_desc) { -		pr_warn("failed to alloc memory in relocation\n"); -		return -ENOMEM; -	} -	prog->nr_reloc = nrels; +	pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n", +		 relo_sec_name, sec_idx, sec_name); +	nrels = shdr->sh_size / shdr->sh_entsize;  	for (i = 0; i < nrels; i++) { -		const char *name; -		__u32 insn_idx; -		GElf_Sym sym; -		GElf_Rel rel; -  		if (!gelf_getrel(data, i, &rel)) { -			pr_warn("relocation: failed to get %d reloc\n", i); +			pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);  			return -LIBBPF_ERRNO__FORMAT;  		}  		if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { -			pr_warn("relocation: symbol %"PRIx64" not found\n", -				GELF_R_SYM(rel.r_info)); +			pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n", +				relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);  			return -LIBBPF_ERRNO__FORMAT;  		} -		if (rel.r_offset % sizeof(struct bpf_insn)) +		if (rel.r_offset % BPF_INSN_SZ) { +			pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", +				relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);  			return -LIBBPF_ERRNO__FORMAT; +		} -		insn_idx = rel.r_offset / sizeof(struct bpf_insn); -		name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, -				  sym.st_name) ? : "<?>"; +		insn_idx = rel.r_offset / BPF_INSN_SZ; +		/* relocations against static functions are recorded as +		 * relocations against the section that contains a function; +		 * in such case, symbol will be STT_SECTION and sym.st_name +		 * will point to empty string (0), so fetch section name +		 * instead +		 */ +		if (GELF_ST_TYPE(sym.st_info) == STT_SECTION && sym.st_name == 0) +			sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym.st_shndx)); +		else +			sym_name = elf_sym_str(obj, sym.st_name); +		sym_name = sym_name ?: "<?"; + +		pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n", +			 relo_sec_name, i, insn_idx, sym_name); + +		prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); +		if (!prog) { +			pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n", +				relo_sec_name, i, sec_name, insn_idx); +			return -LIBBPF_ERRNO__RELOC; +		} -		pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n", -			 (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info), -			 (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info), -			 GELF_ST_BIND(sym.st_info), sym.st_name, name, -			 insn_idx); +		relos = libbpf_reallocarray(prog->reloc_desc, +					    prog->nr_reloc + 1, sizeof(*relos)); +		if (!relos) +			return -ENOMEM; +		prog->reloc_desc = relos; -		err = bpf_program__record_reloc(prog, &prog->reloc_desc[i], -						insn_idx, name, &sym, &rel); +		/* adjust insn_idx to local BPF program frame of reference */ +		insn_idx -= prog->sec_insn_off; +		err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc], +						insn_idx, sym_name, &sym, &rel);  		if (err)  			return err; + +		prog->nr_reloc++;  	}  	return 0;  } @@ -3433,8 +3718,14 @@ bpf_object__probe_loading(struct bpf_object *obj)  	return 0;  } -static int -bpf_object__probe_name(struct bpf_object *obj) +static int probe_fd(int fd) +{ +	if (fd >= 0) +		close(fd); +	return fd >= 0; +} + +static int probe_kern_prog_name(void)  {  	struct bpf_load_program_attr attr;  	struct bpf_insn insns[] = { @@ -3452,16 +3743,10 @@ bpf_object__probe_name(struct bpf_object *obj)  	attr.license = "GPL";  	attr.name = "test";  	ret = bpf_load_program_xattr(&attr, NULL, 0); -	if (ret >= 0) { -		obj->caps.name = 1; -		close(ret); -	} - -	return 0; +	return probe_fd(ret);  } -static int -bpf_object__probe_global_data(struct bpf_object *obj) +static int probe_kern_global_data(void)  {  	struct bpf_load_program_attr prg_attr;  	struct bpf_create_map_attr map_attr; @@ -3498,16 +3783,23 @@ bpf_object__probe_global_data(struct bpf_object *obj)  	prg_attr.license = "GPL";  	ret = bpf_load_program_xattr(&prg_attr, NULL, 0); -	if (ret >= 0) { -		obj->caps.global_data = 1; -		close(ret); -	} -  	close(map); -	return 0; +	return probe_fd(ret); +} + +static int probe_kern_btf(void) +{ +	static const char strs[] = "\0int"; +	__u32 types[] = { +		/* int */ +		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), +	}; + +	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), +					     strs, sizeof(strs)));  } -static int bpf_object__probe_btf_func(struct bpf_object *obj) +static int probe_kern_btf_func(void)  {  	static const char strs[] = "\0int\0x\0a";  	/* void x(int a) {} */ @@ -3520,20 +3812,12 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj)  		/* FUNC x */                                    /* [3] */  		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),  	}; -	int btf_fd; -	btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), -				      strs, sizeof(strs)); -	if (btf_fd >= 0) { -		obj->caps.btf_func = 1; -		close(btf_fd); -		return 1; -	} - -	return 0; +	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), +					     strs, sizeof(strs)));  } -static int bpf_object__probe_btf_func_global(struct bpf_object *obj) +static int probe_kern_btf_func_global(void)  {  	static const char strs[] = "\0int\0x\0a";  	/* static void x(int a) {} */ @@ -3546,20 +3830,12 @@ static int bpf_object__probe_btf_func_global(struct bpf_object *obj)  		/* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */  		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),  	}; -	int btf_fd; -	btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), -				      strs, sizeof(strs)); -	if (btf_fd >= 0) { -		obj->caps.btf_func_global = 1; -		close(btf_fd); -		return 1; -	} - -	return 0; +	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), +					     strs, sizeof(strs)));  } -static int bpf_object__probe_btf_datasec(struct bpf_object *obj) +static int probe_kern_btf_datasec(void)  {  	static const char strs[] = "\0x\0.data";  	/* static int a; */ @@ -3573,20 +3849,12 @@ static int bpf_object__probe_btf_datasec(struct bpf_object *obj)  		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),  		BTF_VAR_SECINFO_ENC(2, 0, 4),  	}; -	int btf_fd; -	btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), -				      strs, sizeof(strs)); -	if (btf_fd >= 0) { -		obj->caps.btf_datasec = 1; -		close(btf_fd); -		return 1; -	} - -	return 0; +	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), +					     strs, sizeof(strs)));  } -static int bpf_object__probe_array_mmap(struct bpf_object *obj) +static int probe_kern_array_mmap(void)  {  	struct bpf_create_map_attr attr = {  		.map_type = BPF_MAP_TYPE_ARRAY, @@ -3595,27 +3863,17 @@ static int bpf_object__probe_array_mmap(struct bpf_object *obj)  		.value_size = sizeof(int),  		.max_entries = 1,  	}; -	int fd; - -	fd = bpf_create_map_xattr(&attr); -	if (fd >= 0) { -		obj->caps.array_mmap = 1; -		close(fd); -		return 1; -	} -	return 0; +	return probe_fd(bpf_create_map_xattr(&attr));  } -static int -bpf_object__probe_exp_attach_type(struct bpf_object *obj) +static int probe_kern_exp_attach_type(void)  {  	struct bpf_load_program_attr attr;  	struct bpf_insn insns[] = {  		BPF_MOV64_IMM(BPF_REG_0, 0),  		BPF_EXIT_INSN(),  	}; -	int fd;  	memset(&attr, 0, sizeof(attr));  	/* use any valid combination of program type and (optional) @@ -3629,36 +3887,140 @@ bpf_object__probe_exp_attach_type(struct bpf_object *obj)  	attr.insns_cnt = ARRAY_SIZE(insns);  	attr.license = "GPL"; -	fd = bpf_load_program_xattr(&attr, NULL, 0); -	if (fd >= 0) { -		obj->caps.exp_attach_type = 1; -		close(fd); -		return 1; -	} -	return 0; +	return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));  } -static int -bpf_object__probe_caps(struct bpf_object *obj) +static int probe_kern_probe_read_kernel(void) +{ +	struct bpf_load_program_attr attr; +	struct bpf_insn insns[] = { +		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),	/* r1 = r10 (fp) */ +		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),	/* r1 += -8 */ +		BPF_MOV64_IMM(BPF_REG_2, 8),		/* r2 = 8 */ +		BPF_MOV64_IMM(BPF_REG_3, 0),		/* r3 = 0 */ +		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), +		BPF_EXIT_INSN(), +	}; + +	memset(&attr, 0, sizeof(attr)); +	attr.prog_type = BPF_PROG_TYPE_KPROBE; +	attr.insns = insns; +	attr.insns_cnt = ARRAY_SIZE(insns); +	attr.license = "GPL"; + +	return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); +} + +static int probe_prog_bind_map(void)  { -	int (*probe_fn[])(struct bpf_object *obj) = { -		bpf_object__probe_name, -		bpf_object__probe_global_data, -		bpf_object__probe_btf_func, -		bpf_object__probe_btf_func_global, -		bpf_object__probe_btf_datasec, -		bpf_object__probe_array_mmap, -		bpf_object__probe_exp_attach_type, +	struct bpf_load_program_attr prg_attr; +	struct bpf_create_map_attr map_attr; +	char *cp, errmsg[STRERR_BUFSIZE]; +	struct bpf_insn insns[] = { +		BPF_MOV64_IMM(BPF_REG_0, 0), +		BPF_EXIT_INSN(),  	}; -	int i, ret; +	int ret, map, prog; -	for (i = 0; i < ARRAY_SIZE(probe_fn); i++) { -		ret = probe_fn[i](obj); -		if (ret < 0) -			pr_debug("Probe #%d failed with %d.\n", i, ret); +	memset(&map_attr, 0, sizeof(map_attr)); +	map_attr.map_type = BPF_MAP_TYPE_ARRAY; +	map_attr.key_size = sizeof(int); +	map_attr.value_size = 32; +	map_attr.max_entries = 1; + +	map = bpf_create_map_xattr(&map_attr); +	if (map < 0) { +		ret = -errno; +		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); +		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", +			__func__, cp, -ret); +		return ret;  	} -	return 0; +	memset(&prg_attr, 0, sizeof(prg_attr)); +	prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; +	prg_attr.insns = insns; +	prg_attr.insns_cnt = ARRAY_SIZE(insns); +	prg_attr.license = "GPL"; + +	prog = bpf_load_program_xattr(&prg_attr, NULL, 0); +	if (prog < 0) { +		close(map); +		return 0; +	} + +	ret = bpf_prog_bind_map(prog, map, NULL); + +	close(map); +	close(prog); + +	return ret >= 0; +} + +enum kern_feature_result { +	FEAT_UNKNOWN = 0, +	FEAT_SUPPORTED = 1, +	FEAT_MISSING = 2, +}; + +typedef int (*feature_probe_fn)(void); + +static struct kern_feature_desc { +	const char *desc; +	feature_probe_fn probe; +	enum kern_feature_result res; +} feature_probes[__FEAT_CNT] = { +	[FEAT_PROG_NAME] = { +		"BPF program name", probe_kern_prog_name, +	}, +	[FEAT_GLOBAL_DATA] = { +		"global variables", probe_kern_global_data, +	}, +	[FEAT_BTF] = { +		"minimal BTF", probe_kern_btf, +	}, +	[FEAT_BTF_FUNC] = { +		"BTF functions", probe_kern_btf_func, +	}, +	[FEAT_BTF_GLOBAL_FUNC] = { +		"BTF global function", probe_kern_btf_func_global, +	}, +	[FEAT_BTF_DATASEC] = { +		"BTF data section and variable", probe_kern_btf_datasec, +	}, +	[FEAT_ARRAY_MMAP] = { +		"ARRAY map mmap()", probe_kern_array_mmap, +	}, +	[FEAT_EXP_ATTACH_TYPE] = { +		"BPF_PROG_LOAD expected_attach_type attribute", +		probe_kern_exp_attach_type, +	}, +	[FEAT_PROBE_READ_KERN] = { +		"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, +	}, +	[FEAT_PROG_BIND_MAP] = { +		"BPF_PROG_BIND_MAP support", probe_prog_bind_map, +	} +}; + +static bool kernel_supports(enum kern_feature_id feat_id) +{ +	struct kern_feature_desc *feat = &feature_probes[feat_id]; +	int ret; + +	if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { +		ret = feat->probe(); +		if (ret > 0) { +			WRITE_ONCE(feat->res, FEAT_SUPPORTED); +		} else if (ret == 0) { +			WRITE_ONCE(feat->res, FEAT_MISSING); +		} else { +			pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); +			WRITE_ONCE(feat->res, FEAT_MISSING); +		} +	} + +	return READ_ONCE(feat->res) == FEAT_SUPPORTED;  }  static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) @@ -3760,7 +4122,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)  	memset(&create_attr, 0, sizeof(create_attr)); -	if (obj->caps.name) +	if (kernel_supports(FEAT_PROG_NAME))  		create_attr.name = map->name;  	create_attr.map_ifindex = map->map_ifindex;  	create_attr.map_type = def->type; @@ -3841,6 +4203,36 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)  	return 0;  } +static int init_map_slots(struct bpf_map *map) +{ +	const struct bpf_map *targ_map; +	unsigned int i; +	int fd, err; + +	for (i = 0; i < map->init_slots_sz; i++) { +		if (!map->init_slots[i]) +			continue; + +		targ_map = map->init_slots[i]; +		fd = bpf_map__fd(targ_map); +		err = bpf_map_update_elem(map->fd, &i, &fd, 0); +		if (err) { +			err = -errno; +			pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", +				map->name, i, targ_map->name, +				fd, err); +			return err; +		} +		pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", +			 map->name, i, targ_map->name, fd); +	} + +	zfree(&map->init_slots); +	map->init_slots_sz = 0; + +	return 0; +} +  static int  bpf_object__create_maps(struct bpf_object *obj)  { @@ -3864,47 +4256,29 @@ bpf_object__create_maps(struct bpf_object *obj)  		if (map->fd >= 0) {  			pr_debug("map '%s': skipping creation (preset fd=%d)\n",  				 map->name, map->fd); -			continue; -		} - -		err = bpf_object__create_map(obj, map); -		if (err) -			goto err_out; - -		pr_debug("map '%s': created successfully, fd=%d\n", map->name, -			 map->fd); - -		if (bpf_map__is_internal(map)) { -			err = bpf_object__populate_internal_map(obj, map); -			if (err < 0) { -				zclose(map->fd); +		} else { +			err = bpf_object__create_map(obj, map); +			if (err)  				goto err_out; -			} -		} -		if (map->init_slots_sz) { -			for (j = 0; j < map->init_slots_sz; j++) { -				const struct bpf_map *targ_map; -				int fd; +			pr_debug("map '%s': created successfully, fd=%d\n", +				 map->name, map->fd); -				if (!map->init_slots[j]) -					continue; +			if (bpf_map__is_internal(map)) { +				err = bpf_object__populate_internal_map(obj, map); +				if (err < 0) { +					zclose(map->fd); +					goto err_out; +				} +			} -				targ_map = map->init_slots[j]; -				fd = bpf_map__fd(targ_map); -				err = bpf_map_update_elem(map->fd, &j, &fd, 0); -				if (err) { -					err = -errno; -					pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", -						map->name, j, targ_map->name, -						fd, err); +			if (map->init_slots_sz) { +				err = init_map_slots(map); +				if (err < 0) { +					zclose(map->fd);  					goto err_out;  				} -				pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", -					 map->name, j, targ_map->name, fd);  			} -			zfree(&map->init_slots); -			map->init_slots_sz = 0;  		}  		if (map->pin_path && !map->pinned) { @@ -3929,75 +4303,6 @@ err_out:  	return err;  } -static int -check_btf_ext_reloc_err(struct bpf_program *prog, int err, -			void *btf_prog_info, const char *info_name) -{ -	if (err != -ENOENT) { -		pr_warn("Error in loading %s for sec %s.\n", -			info_name, prog->section_name); -		return err; -	} - -	/* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */ - -	if (btf_prog_info) { -		/* -		 * Some info has already been found but has problem -		 * in the last btf_ext reloc. Must have to error out. -		 */ -		pr_warn("Error in relocating %s for sec %s.\n", -			info_name, prog->section_name); -		return err; -	} - -	/* Have problem loading the very first info. Ignore the rest. */ -	pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n", -		info_name, prog->section_name, info_name); -	return 0; -} - -static int -bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj, -			  const char *section_name,  __u32 insn_offset) -{ -	int err; - -	if (!insn_offset || prog->func_info) { -		/* -		 * !insn_offset => main program -		 * -		 * For sub prog, the main program's func_info has to -		 * be loaded first (i.e. prog->func_info != NULL) -		 */ -		err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext, -					       section_name, insn_offset, -					       &prog->func_info, -					       &prog->func_info_cnt); -		if (err) -			return check_btf_ext_reloc_err(prog, err, -						       prog->func_info, -						       "bpf_func_info"); - -		prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext); -	} - -	if (!insn_offset || prog->line_info) { -		err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext, -					       section_name, insn_offset, -					       &prog->line_info, -					       &prog->line_info_cnt); -		if (err) -			return check_btf_ext_reloc_err(prog, err, -						       prog->line_info, -						       "bpf_line_info"); - -		prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); -	} - -	return 0; -} -  #define BPF_CORE_SPEC_MAX_LEN 64  /* represents BPF CO-RE field or array element accessor */ @@ -4011,6 +4316,10 @@ struct bpf_core_spec {  	const struct btf *btf;  	/* high-level spec: named fields and array indices only */  	struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; +	/* original unresolved (no skip_mods_or_typedefs) root type ID */ +	__u32 root_type_id; +	/* CO-RE relocation kind */ +	enum bpf_core_relo_kind relo_kind;  	/* high-level spec length */  	int len;  	/* raw, low-level spec: 1-to-1 with accessor spec string */ @@ -4041,8 +4350,66 @@ static bool is_flex_arr(const struct btf *btf,  	return acc->idx == btf_vlen(t) - 1;  } +static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) +{ +	switch (kind) { +	case BPF_FIELD_BYTE_OFFSET: return "byte_off"; +	case BPF_FIELD_BYTE_SIZE: return "byte_sz"; +	case BPF_FIELD_EXISTS: return "field_exists"; +	case BPF_FIELD_SIGNED: return "signed"; +	case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; +	case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; +	case BPF_TYPE_ID_LOCAL: return "local_type_id"; +	case BPF_TYPE_ID_TARGET: return "target_type_id"; +	case BPF_TYPE_EXISTS: return "type_exists"; +	case BPF_TYPE_SIZE: return "type_size"; +	case BPF_ENUMVAL_EXISTS: return "enumval_exists"; +	case BPF_ENUMVAL_VALUE: return "enumval_value"; +	default: return "unknown"; +	} +} + +static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) +{ +	switch (kind) { +	case BPF_FIELD_BYTE_OFFSET: +	case BPF_FIELD_BYTE_SIZE: +	case BPF_FIELD_EXISTS: +	case BPF_FIELD_SIGNED: +	case BPF_FIELD_LSHIFT_U64: +	case BPF_FIELD_RSHIFT_U64: +		return true; +	default: +		return false; +	} +} + +static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) +{ +	switch (kind) { +	case BPF_TYPE_ID_LOCAL: +	case BPF_TYPE_ID_TARGET: +	case BPF_TYPE_EXISTS: +	case BPF_TYPE_SIZE: +		return true; +	default: +		return false; +	} +} + +static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) +{ +	switch (kind) { +	case BPF_ENUMVAL_EXISTS: +	case BPF_ENUMVAL_VALUE: +		return true; +	default: +		return false; +	} +} +  /* - * Turn bpf_field_reloc into a low- and high-level spec representation, + * Turn bpf_core_relo into a low- and high-level spec representation,   * validating correctness along the way, as well as calculating resulting   * field bit offset, specified by accessor string. Low-level spec captures   * every single level of nestedness, including traversing anonymous @@ -4071,10 +4438,17 @@ static bool is_flex_arr(const struct btf *btf,   *   - field 'a' access (corresponds to '2' in low-level spec);   *   - array element #3 access (corresponds to '3' in low-level spec).   * + * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, + * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their + * spec and raw_spec are kept empty. + * + * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access + * string to specify enumerator's value index that need to be relocated.   */ -static int bpf_core_spec_parse(const struct btf *btf, +static int bpf_core_parse_spec(const struct btf *btf,  			       __u32 type_id,  			       const char *spec_str, +			       enum bpf_core_relo_kind relo_kind,  			       struct bpf_core_spec *spec)  {  	int access_idx, parsed_len, i; @@ -4089,6 +4463,15 @@ static int bpf_core_spec_parse(const struct btf *btf,  	memset(spec, 0, sizeof(*spec));  	spec->btf = btf; +	spec->root_type_id = type_id; +	spec->relo_kind = relo_kind; + +	/* type-based relocations don't have a field access string */ +	if (core_relo_is_type_based(relo_kind)) { +		if (strcmp(spec_str, "0")) +			return -EINVAL; +		return 0; +	}  	/* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */  	while (*spec_str) { @@ -4105,16 +4488,28 @@ static int bpf_core_spec_parse(const struct btf *btf,  	if (spec->raw_len == 0)  		return -EINVAL; -	/* first spec value is always reloc type array index */  	t = skip_mods_and_typedefs(btf, type_id, &id);  	if (!t)  		return -EINVAL;  	access_idx = spec->raw_spec[0]; -	spec->spec[0].type_id = id; -	spec->spec[0].idx = access_idx; +	acc = &spec->spec[0]; +	acc->type_id = id; +	acc->idx = access_idx;  	spec->len++; +	if (core_relo_is_enumval_based(relo_kind)) { +		if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) +			return -EINVAL; + +		/* record enumerator name in a first accessor */ +		acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); +		return 0; +	} + +	if (!core_relo_is_field_based(relo_kind)) +		return -EINVAL; +  	sz = btf__resolve_size(btf, id);  	if (sz < 0)  		return sz; @@ -4172,8 +4567,8 @@ static int bpf_core_spec_parse(const struct btf *btf,  				return sz;  			spec->bit_offset += access_idx * sz * 8;  		} else { -			pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", -				type_id, spec_str, i, id, btf_kind(t)); +			pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", +				type_id, spec_str, i, id, btf_kind_str(t));  			return -EINVAL;  		}  	} @@ -4223,16 +4618,16 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,  {  	size_t local_essent_len, targ_essent_len;  	const char *local_name, *targ_name; -	const struct btf_type *t; +	const struct btf_type *t, *local_t;  	struct ids_vec *cand_ids;  	__u32 *new_ids;  	int i, err, n; -	t = btf__type_by_id(local_btf, local_type_id); -	if (!t) +	local_t = btf__type_by_id(local_btf, local_type_id); +	if (!local_t)  		return ERR_PTR(-EINVAL); -	local_name = btf__name_by_offset(local_btf, t->name_off); +	local_name = btf__name_by_offset(local_btf, local_t->name_off);  	if (str_is_empty(local_name))  		return ERR_PTR(-EINVAL);  	local_essent_len = bpf_core_essential_name_len(local_name); @@ -4244,12 +4639,11 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,  	n = btf__get_nr_types(targ_btf);  	for (i = 1; i <= n; i++) {  		t = btf__type_by_id(targ_btf, i); -		targ_name = btf__name_by_offset(targ_btf, t->name_off); -		if (str_is_empty(targ_name)) +		if (btf_kind(t) != btf_kind(local_t))  			continue; -		t = skip_mods_and_typedefs(targ_btf, i, NULL); -		if (!btf_is_composite(t) && !btf_is_array(t)) +		targ_name = btf__name_by_offset(targ_btf, t->name_off); +		if (str_is_empty(targ_name))  			continue;  		targ_essent_len = bpf_core_essential_name_len(targ_name); @@ -4257,11 +4651,12 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,  			continue;  		if (strncmp(local_name, targ_name, local_essent_len) == 0) { -			pr_debug("[%d] %s: found candidate [%d] %s\n", -				 local_type_id, local_name, i, targ_name); -			new_ids = reallocarray(cand_ids->data, -					       cand_ids->len + 1, -					       sizeof(*cand_ids->data)); +			pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s\n", +				 local_type_id, btf_kind_str(local_t), +				 local_name, i, btf_kind_str(t), targ_name); +			new_ids = libbpf_reallocarray(cand_ids->data, +						      cand_ids->len + 1, +						      sizeof(*cand_ids->data));  			if (!new_ids) {  				err = -ENOMEM;  				goto err_out; @@ -4276,8 +4671,9 @@ err_out:  	return ERR_PTR(err);  } -/* Check two types for compatibility, skipping const/volatile/restrict and - * typedefs, to ensure we are relocating compatible entities: +/* Check two types for compatibility for the purpose of field access + * relocation. const/volatile/restrict and typedefs are skipped to ensure we + * are relocating semantically compatible entities:   *   - any two STRUCTs/UNIONs are compatible and can be mixed;   *   - any two FWDs are compatible, if their names match (modulo flavor suffix);   *   - any two PTRs are always compatible; @@ -4432,6 +4828,100 @@ static int bpf_core_match_member(const struct btf *local_btf,  	return 0;  } +/* Check local and target types for compatibility. This check is used for + * type-based CO-RE relocations and follow slightly different rules than + * field-based relocations. This function assumes that root types were already + * checked for name match. Beyond that initial root-level name check, names + * are completely ignored. Compatibility rules are as follows: + *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but + *     kind should match for local and target types (i.e., STRUCT is not + *     compatible with UNION); + *   - for ENUMs, the size is ignored; + *   - for INT, size and signedness are ignored; + *   - for ARRAY, dimensionality is ignored, element types are checked for + *     compatibility recursively; + *   - CONST/VOLATILE/RESTRICT modifiers are ignored; + *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; + *   - FUNC_PROTOs are compatible if they have compatible signature: same + *     number of input args and compatible return and argument types. + * These rules are not set in stone and probably will be adjusted as we get + * more experience with using BPF CO-RE relocations. + */ +static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, +				     const struct btf *targ_btf, __u32 targ_id) +{ +	const struct btf_type *local_type, *targ_type; +	int depth = 32; /* max recursion depth */ + +	/* caller made sure that names match (ignoring flavor suffix) */ +	local_type = btf__type_by_id(local_btf, local_id); +	targ_type = btf__type_by_id(targ_btf, targ_id); +	if (btf_kind(local_type) != btf_kind(targ_type)) +		return 0; + +recur: +	depth--; +	if (depth < 0) +		return -EINVAL; + +	local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); +	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); +	if (!local_type || !targ_type) +		return -EINVAL; + +	if (btf_kind(local_type) != btf_kind(targ_type)) +		return 0; + +	switch (btf_kind(local_type)) { +	case BTF_KIND_UNKN: +	case BTF_KIND_STRUCT: +	case BTF_KIND_UNION: +	case BTF_KIND_ENUM: +	case BTF_KIND_FWD: +		return 1; +	case BTF_KIND_INT: +		/* just reject deprecated bitfield-like integers; all other +		 * integers are by default compatible between each other +		 */ +		return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0; +	case BTF_KIND_PTR: +		local_id = local_type->type; +		targ_id = targ_type->type; +		goto recur; +	case BTF_KIND_ARRAY: +		local_id = btf_array(local_type)->type; +		targ_id = btf_array(targ_type)->type; +		goto recur; +	case BTF_KIND_FUNC_PROTO: { +		struct btf_param *local_p = btf_params(local_type); +		struct btf_param *targ_p = btf_params(targ_type); +		__u16 local_vlen = btf_vlen(local_type); +		__u16 targ_vlen = btf_vlen(targ_type); +		int i, err; + +		if (local_vlen != targ_vlen) +			return 0; + +		for (i = 0; i < local_vlen; i++, local_p++, targ_p++) { +			skip_mods_and_typedefs(local_btf, local_p->type, &local_id); +			skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id); +			err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id); +			if (err <= 0) +				return err; +		} + +		/* tail recurse for return type check */ +		skip_mods_and_typedefs(local_btf, local_type->type, &local_id); +		skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id); +		goto recur; +	} +	default: +		pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n", +			btf_kind_str(local_type), local_id, targ_id); +		return 0; +	} +} +  /*   * Try to match local spec to a target type and, if successful, produce full   * target spec (high-level, low-level + bit offset). @@ -4447,10 +4937,51 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,  	memset(targ_spec, 0, sizeof(*targ_spec));  	targ_spec->btf = targ_btf; +	targ_spec->root_type_id = targ_id; +	targ_spec->relo_kind = local_spec->relo_kind; + +	if (core_relo_is_type_based(local_spec->relo_kind)) { +		return bpf_core_types_are_compat(local_spec->btf, +						 local_spec->root_type_id, +						 targ_btf, targ_id); +	}  	local_acc = &local_spec->spec[0];  	targ_acc = &targ_spec->spec[0]; +	if (core_relo_is_enumval_based(local_spec->relo_kind)) { +		size_t local_essent_len, targ_essent_len; +		const struct btf_enum *e; +		const char *targ_name; + +		/* has to resolve to an enum */ +		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); +		if (!btf_is_enum(targ_type)) +			return 0; + +		local_essent_len = bpf_core_essential_name_len(local_acc->name); + +		for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { +			targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); +			targ_essent_len = bpf_core_essential_name_len(targ_name); +			if (targ_essent_len != local_essent_len) +				continue; +			if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) { +				targ_acc->type_id = targ_id; +				targ_acc->idx = i; +				targ_acc->name = targ_name; +				targ_spec->len++; +				targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; +				targ_spec->raw_len++; +				return 1; +			} +		} +		return 0; +	} + +	if (!core_relo_is_field_based(local_spec->relo_kind)) +		return -EINVAL; +  	for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {  		targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,  						   &targ_id); @@ -4507,22 +5038,42 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,  }  static int bpf_core_calc_field_relo(const struct bpf_program *prog, -				    const struct bpf_field_reloc *relo, +				    const struct bpf_core_relo *relo,  				    const struct bpf_core_spec *spec, -				    __u32 *val, bool *validate) +				    __u32 *val, __u32 *field_sz, __u32 *type_id, +				    bool *validate)  { -	const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1]; -	const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id); -	__u32 byte_off, byte_sz, bit_off, bit_sz; +	const struct bpf_core_accessor *acc; +	const struct btf_type *t; +	__u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;  	const struct btf_member *m;  	const struct btf_type *mt;  	bool bitfield;  	__s64 sz; +	*field_sz = 0; + +	if (relo->kind == BPF_FIELD_EXISTS) { +		*val = spec ? 1 : 0; +		return 0; +	} + +	if (!spec) +		return -EUCLEAN; /* request instruction poisoning */ + +	acc = &spec->spec[spec->len - 1]; +	t = btf__type_by_id(spec->btf, acc->type_id); +  	/* a[n] accessor needs special handling */  	if (!acc->name) {  		if (relo->kind == BPF_FIELD_BYTE_OFFSET) {  			*val = spec->bit_offset / 8; +			/* remember field size for load/store mem size */ +			sz = btf__resolve_size(spec->btf, acc->type_id); +			if (sz < 0) +				return -EINVAL; +			*field_sz = sz; +			*type_id = acc->type_id;  		} else if (relo->kind == BPF_FIELD_BYTE_SIZE) {  			sz = btf__resolve_size(spec->btf, acc->type_id);  			if (sz < 0) @@ -4530,8 +5081,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,  			*val = sz;  		} else {  			pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", -				bpf_program__title(prog, false), -				relo->kind, relo->insn_off / 8); +				prog->name, relo->kind, relo->insn_off / 8);  			return -EINVAL;  		}  		if (validate) @@ -4540,7 +5090,7 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,  	}  	m = btf_members(t) + acc->idx; -	mt = skip_mods_and_typedefs(spec->btf, m->type, NULL); +	mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);  	bit_off = spec->bit_offset;  	bit_sz = btf_member_bitfield_size(t, acc->idx); @@ -4553,15 +5103,14 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,  			if (byte_sz >= 8) {  				/* bitfield can't be read with 64-bit read */  				pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", -					bpf_program__title(prog, false), -					relo->kind, relo->insn_off / 8); +					prog->name, relo->kind, relo->insn_off / 8);  				return -E2BIG;  			}  			byte_sz *= 2;  			byte_off = bit_off / 8 / byte_sz * byte_sz;  		}  	} else { -		sz = btf__resolve_size(spec->btf, m->type); +		sz = btf__resolve_size(spec->btf, field_type_id);  		if (sz < 0)  			return -EINVAL;  		byte_sz = sz; @@ -4579,6 +5128,10 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,  	switch (relo->kind) {  	case BPF_FIELD_BYTE_OFFSET:  		*val = byte_off; +		if (!bitfield) { +			*field_sz = byte_sz; +			*type_id = field_type_id; +		}  		break;  	case BPF_FIELD_BYTE_SIZE:  		*val = byte_sz; @@ -4604,117 +5157,384 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog,  		break;  	case BPF_FIELD_EXISTS:  	default: -		pr_warn("prog '%s': unknown relo %d at insn #%d\n", -			bpf_program__title(prog, false), -			relo->kind, relo->insn_off / 8); -		return -EINVAL; +		return -EOPNOTSUPP;  	}  	return 0;  } +static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, +				   const struct bpf_core_spec *spec, +				   __u32 *val) +{ +	__s64 sz; + +	/* type-based relos return zero when target type is not found */ +	if (!spec) { +		*val = 0; +		return 0; +	} + +	switch (relo->kind) { +	case BPF_TYPE_ID_TARGET: +		*val = spec->root_type_id; +		break; +	case BPF_TYPE_EXISTS: +		*val = 1; +		break; +	case BPF_TYPE_SIZE: +		sz = btf__resolve_size(spec->btf, spec->root_type_id); +		if (sz < 0) +			return -EINVAL; +		*val = sz; +		break; +	case BPF_TYPE_ID_LOCAL: +	/* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ +	default: +		return -EOPNOTSUPP; +	} + +	return 0; +} + +static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, +				      const struct bpf_core_spec *spec, +				      __u32 *val) +{ +	const struct btf_type *t; +	const struct btf_enum *e; + +	switch (relo->kind) { +	case BPF_ENUMVAL_EXISTS: +		*val = spec ? 1 : 0; +		break; +	case BPF_ENUMVAL_VALUE: +		if (!spec) +			return -EUCLEAN; /* request instruction poisoning */ +		t = btf__type_by_id(spec->btf, spec->spec[0].type_id); +		e = btf_enum(t) + spec->spec[0].idx; +		*val = e->val; +		break; +	default: +		return -EOPNOTSUPP; +	} + +	return 0; +} + +struct bpf_core_relo_res +{ +	/* expected value in the instruction, unless validate == false */ +	__u32 orig_val; +	/* new value that needs to be patched up to */ +	__u32 new_val; +	/* relocation unsuccessful, poison instruction, but don't fail load */ +	bool poison; +	/* some relocations can't be validated against orig_val */ +	bool validate; +	/* for field byte offset relocations or the forms: +	 *     *(T *)(rX + <off>) = rY +	 *     rX = *(T *)(rY + <off>), +	 * we remember original and resolved field size to adjust direct +	 * memory loads of pointers and integers; this is necessary for 32-bit +	 * host kernel architectures, but also allows to automatically +	 * relocate fields that were resized from, e.g., u32 to u64, etc. +	 */ +	bool fail_memsz_adjust; +	__u32 orig_sz; +	__u32 orig_type_id; +	__u32 new_sz; +	__u32 new_type_id; +}; + +/* Calculate original and target relocation values, given local and target + * specs and relocation kind. These values are calculated for each candidate. + * If there are multiple candidates, resulting values should all be consistent + * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity. + * If instruction has to be poisoned, *poison will be set to true. + */ +static int bpf_core_calc_relo(const struct bpf_program *prog, +			      const struct bpf_core_relo *relo, +			      int relo_idx, +			      const struct bpf_core_spec *local_spec, +			      const struct bpf_core_spec *targ_spec, +			      struct bpf_core_relo_res *res) +{ +	int err = -EOPNOTSUPP; + +	res->orig_val = 0; +	res->new_val = 0; +	res->poison = false; +	res->validate = true; +	res->fail_memsz_adjust = false; +	res->orig_sz = res->new_sz = 0; +	res->orig_type_id = res->new_type_id = 0; + +	if (core_relo_is_field_based(relo->kind)) { +		err = bpf_core_calc_field_relo(prog, relo, local_spec, +					       &res->orig_val, &res->orig_sz, +					       &res->orig_type_id, &res->validate); +		err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, +						      &res->new_val, &res->new_sz, +						      &res->new_type_id, NULL); +		if (err) +			goto done; +		/* Validate if it's safe to adjust load/store memory size. +		 * Adjustments are performed only if original and new memory +		 * sizes differ. +		 */ +		res->fail_memsz_adjust = false; +		if (res->orig_sz != res->new_sz) { +			const struct btf_type *orig_t, *new_t; + +			orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); +			new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); + +			/* There are two use cases in which it's safe to +			 * adjust load/store's mem size: +			 *   - reading a 32-bit kernel pointer, while on BPF +			 *   size pointers are always 64-bit; in this case +			 *   it's safe to "downsize" instruction size due to +			 *   pointer being treated as unsigned integer with +			 *   zero-extended upper 32-bits; +			 *   - reading unsigned integers, again due to +			 *   zero-extension is preserving the value correctly. +			 * +			 * In all other cases it's incorrect to attempt to +			 * load/store field because read value will be +			 * incorrect, so we poison relocated instruction. +			 */ +			if (btf_is_ptr(orig_t) && btf_is_ptr(new_t)) +				goto done; +			if (btf_is_int(orig_t) && btf_is_int(new_t) && +			    btf_int_encoding(orig_t) != BTF_INT_SIGNED && +			    btf_int_encoding(new_t) != BTF_INT_SIGNED) +				goto done; + +			/* mark as invalid mem size adjustment, but this will +			 * only be checked for LDX/STX/ST insns +			 */ +			res->fail_memsz_adjust = true; +		} +	} else if (core_relo_is_type_based(relo->kind)) { +		err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); +		err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); +	} else if (core_relo_is_enumval_based(relo->kind)) { +		err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); +		err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); +	} + +done: +	if (err == -EUCLEAN) { +		/* EUCLEAN is used to signal instruction poisoning request */ +		res->poison = true; +		err = 0; +	} else if (err == -EOPNOTSUPP) { +		/* EOPNOTSUPP means unknown/unsupported relocation */ +		pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n", +			prog->name, relo_idx, core_relo_kind_str(relo->kind), +			relo->kind, relo->insn_off / 8); +	} + +	return err; +} + +/* + * Turn instruction for which CO_RE relocation failed into invalid one with + * distinct signature. + */ +static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx, +				 int insn_idx, struct bpf_insn *insn) +{ +	pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", +		 prog->name, relo_idx, insn_idx); +	insn->code = BPF_JMP | BPF_CALL; +	insn->dst_reg = 0; +	insn->src_reg = 0; +	insn->off = 0; +	/* if this instruction is reachable (not a dead code), +	 * verifier will complain with the following message: +	 * invalid func unknown#195896080 +	 */ +	insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ +} + +static bool is_ldimm64(struct bpf_insn *insn) +{ +	return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + +static int insn_bpf_size_to_bytes(struct bpf_insn *insn) +{ +	switch (BPF_SIZE(insn->code)) { +	case BPF_DW: return 8; +	case BPF_W: return 4; +	case BPF_H: return 2; +	case BPF_B: return 1; +	default: return -1; +	} +} + +static int insn_bytes_to_bpf_size(__u32 sz) +{ +	switch (sz) { +	case 8: return BPF_DW; +	case 4: return BPF_W; +	case 2: return BPF_H; +	case 1: return BPF_B; +	default: return -1; +	} +} +  /*   * Patch relocatable BPF instruction.   *   * Patched value is determined by relocation kind and target specification. - * For field existence relocation target spec will be NULL if field is not - * found. + * For existence relocations target spec will be NULL if field/type is not found.   * Expected insn->imm value is determined using relocation kind and local   * spec, and is checked before patching instruction. If actual insn->imm value   * is wrong, bail out with error.   * - * Currently three kinds of BPF instructions are supported: + * Currently supported classes of BPF instruction are:   * 1. rX = <imm> (assignment with immediate operand);   * 2. rX += <imm> (arithmetic operations with immediate operand); + * 3. rX = <imm64> (load with 64-bit immediate value); + * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64}; + * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64}; + * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.   */ -static int bpf_core_reloc_insn(struct bpf_program *prog, -			       const struct bpf_field_reloc *relo, +static int bpf_core_patch_insn(struct bpf_program *prog, +			       const struct bpf_core_relo *relo,  			       int relo_idx, -			       const struct bpf_core_spec *local_spec, -			       const struct bpf_core_spec *targ_spec) +			       const struct bpf_core_relo_res *res)  {  	__u32 orig_val, new_val;  	struct bpf_insn *insn; -	bool validate = true; -	int insn_idx, err; +	int insn_idx;  	__u8 class; -	if (relo->insn_off % sizeof(struct bpf_insn)) +	if (relo->insn_off % BPF_INSN_SZ)  		return -EINVAL; -	insn_idx = relo->insn_off / sizeof(struct bpf_insn); +	insn_idx = relo->insn_off / BPF_INSN_SZ; +	/* adjust insn_idx from section frame of reference to the local +	 * program's frame of reference; (sub-)program code is not yet +	 * relocated, so it's enough to just subtract in-section offset +	 */ +	insn_idx = insn_idx - prog->sec_insn_off;  	insn = &prog->insns[insn_idx];  	class = BPF_CLASS(insn->code); -	if (relo->kind == BPF_FIELD_EXISTS) { -		orig_val = 1; /* can't generate EXISTS relo w/o local field */ -		new_val = targ_spec ? 1 : 0; -	} else if (!targ_spec) { -		pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", -			 bpf_program__title(prog, false), relo_idx, insn_idx); -		insn->code = BPF_JMP | BPF_CALL; -		insn->dst_reg = 0; -		insn->src_reg = 0; -		insn->off = 0; -		/* if this instruction is reachable (not a dead code), -		 * verifier will complain with the following message: -		 * invalid func unknown#195896080 +	if (res->poison) { +poison: +		/* poison second part of ldimm64 to avoid confusing error from +		 * verifier about "unknown opcode 00"  		 */ -		insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ +		if (is_ldimm64(insn)) +			bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1); +		bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);  		return 0; -	} else { -		err = bpf_core_calc_field_relo(prog, relo, local_spec, -					       &orig_val, &validate); -		if (err) -			return err; -		err = bpf_core_calc_field_relo(prog, relo, targ_spec, -					       &new_val, NULL); -		if (err) -			return err;  	} +	orig_val = res->orig_val; +	new_val = res->new_val; +  	switch (class) {  	case BPF_ALU:  	case BPF_ALU64:  		if (BPF_SRC(insn->code) != BPF_K)  			return -EINVAL; -		if (validate && insn->imm != orig_val) { +		if (res->validate && insn->imm != orig_val) {  			pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", -				bpf_program__title(prog, false), relo_idx, +				prog->name, relo_idx,  				insn_idx, insn->imm, orig_val, new_val);  			return -EINVAL;  		}  		orig_val = insn->imm;  		insn->imm = new_val;  		pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", -			 bpf_program__title(prog, false), relo_idx, insn_idx, +			 prog->name, relo_idx, insn_idx,  			 orig_val, new_val);  		break;  	case BPF_LDX:  	case BPF_ST:  	case BPF_STX: -		if (validate && insn->off != orig_val) { -			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n", -				bpf_program__title(prog, false), relo_idx, -				insn_idx, insn->off, orig_val, new_val); +		if (res->validate && insn->off != orig_val) { +			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", +				prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);  			return -EINVAL;  		}  		if (new_val > SHRT_MAX) {  			pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", -				bpf_program__title(prog, false), relo_idx, -				insn_idx, new_val); +				prog->name, relo_idx, insn_idx, new_val);  			return -ERANGE;  		} +		if (res->fail_memsz_adjust) { +			pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. " +				"Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n", +				prog->name, relo_idx, insn_idx); +			goto poison; +		} +  		orig_val = insn->off;  		insn->off = new_val;  		pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", -			 bpf_program__title(prog, false), relo_idx, insn_idx, -			 orig_val, new_val); +			 prog->name, relo_idx, insn_idx, orig_val, new_val); + +		if (res->new_sz != res->orig_sz) { +			int insn_bytes_sz, insn_bpf_sz; + +			insn_bytes_sz = insn_bpf_size_to_bytes(insn); +			if (insn_bytes_sz != res->orig_sz) { +				pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n", +					prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz); +				return -EINVAL; +			} + +			insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz); +			if (insn_bpf_sz < 0) { +				pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n", +					prog->name, relo_idx, insn_idx, res->new_sz); +				return -EINVAL; +			} + +			insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code); +			pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n", +				 prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz); +		}  		break; +	case BPF_LD: { +		__u64 imm; + +		if (!is_ldimm64(insn) || +		    insn[0].src_reg != 0 || insn[0].off != 0 || +		    insn_idx + 1 >= prog->insns_cnt || +		    insn[1].code != 0 || insn[1].dst_reg != 0 || +		    insn[1].src_reg != 0 || insn[1].off != 0) { +			pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n", +				prog->name, relo_idx, insn_idx); +			return -EINVAL; +		} + +		imm = insn[0].imm + ((__u64)insn[1].imm << 32); +		if (res->validate && imm != orig_val) { +			pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", +				prog->name, relo_idx, +				insn_idx, (unsigned long long)imm, +				orig_val, new_val); +			return -EINVAL; +		} + +		insn[0].imm = new_val; +		insn[1].imm = 0; /* currently only 32-bit values are supported */ +		pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", +			 prog->name, relo_idx, insn_idx, +			 (unsigned long long)imm, new_val); +		break; +	}  	default: -		pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", -			bpf_program__title(prog, false), relo_idx, -			insn_idx, insn->code, insn->src_reg, insn->dst_reg, -			insn->off, insn->imm); +		pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n", +			prog->name, relo_idx, insn_idx, insn->code, +			insn->src_reg, insn->dst_reg, insn->off, insn->imm);  		return -EINVAL;  	} @@ -4728,29 +5548,48 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,  static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)  {  	const struct btf_type *t; +	const struct btf_enum *e;  	const char *s;  	__u32 type_id;  	int i; -	type_id = spec->spec[0].type_id; +	type_id = spec->root_type_id;  	t = btf__type_by_id(spec->btf, type_id);  	s = btf__name_by_offset(spec->btf, t->name_off); -	libbpf_print(level, "[%u] %s + ", type_id, s); -	for (i = 0; i < spec->raw_len; i++) -		libbpf_print(level, "%d%s", spec->raw_spec[i], -			     i == spec->raw_len - 1 ? " => " : ":"); +	libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); -	libbpf_print(level, "%u.%u @ &x", -		     spec->bit_offset / 8, spec->bit_offset % 8); +	if (core_relo_is_type_based(spec->relo_kind)) +		return; -	for (i = 0; i < spec->len; i++) { -		if (spec->spec[i].name) -			libbpf_print(level, ".%s", spec->spec[i].name); -		else -			libbpf_print(level, "[%u]", spec->spec[i].idx); +	if (core_relo_is_enumval_based(spec->relo_kind)) { +		t = skip_mods_and_typedefs(spec->btf, type_id, NULL); +		e = btf_enum(t) + spec->raw_spec[0]; +		s = btf__name_by_offset(spec->btf, e->name_off); + +		libbpf_print(level, "::%s = %u", s, e->val); +		return;  	} +	if (core_relo_is_field_based(spec->relo_kind)) { +		for (i = 0; i < spec->len; i++) { +			if (spec->spec[i].name) +				libbpf_print(level, ".%s", spec->spec[i].name); +			else if (i > 0 || spec->spec[i].idx > 0) +				libbpf_print(level, "[%u]", spec->spec[i].idx); +		} + +		libbpf_print(level, " ("); +		for (i = 0; i < spec->raw_len; i++) +			libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); + +		if (spec->bit_offset % 8) +			libbpf_print(level, " @ offset %u.%u)", +				     spec->bit_offset / 8, spec->bit_offset % 8); +		else +			libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); +		return; +	}  }  static size_t bpf_core_hash_fn(const void *key, void *ctx) @@ -4814,22 +5653,22 @@ static void *u32_as_hash_key(__u32 x)   *    CPU-wise compared to prebuilding a map from all local type names to   *    a list of candidate type names. It's also sped up by caching resolved   *    list of matching candidates per each local "root" type ID, that has at - *    least one bpf_field_reloc associated with it. This list is shared + *    least one bpf_core_relo associated with it. This list is shared   *    between multiple relocations for the same type ID and is updated as some   *    of the candidates are pruned due to structural incompatibility.   */ -static int bpf_core_reloc_field(struct bpf_program *prog, -				 const struct bpf_field_reloc *relo, -				 int relo_idx, -				 const struct btf *local_btf, -				 const struct btf *targ_btf, -				 struct hashmap *cand_cache) +static int bpf_core_apply_relo(struct bpf_program *prog, +			       const struct bpf_core_relo *relo, +			       int relo_idx, +			       const struct btf *local_btf, +			       const struct btf *targ_btf, +			       struct hashmap *cand_cache)  { -	const char *prog_name = bpf_program__title(prog, false); -	struct bpf_core_spec local_spec, cand_spec, targ_spec; +	struct bpf_core_spec local_spec, cand_spec, targ_spec = {};  	const void *type_key = u32_as_hash_key(relo->type_id); -	const struct btf_type *local_type, *cand_type; -	const char *local_name, *cand_name; +	struct bpf_core_relo_res cand_res, targ_res; +	const struct btf_type *local_type; +	const char *local_name;  	struct ids_vec *cand_ids;  	__u32 local_id, cand_id;  	const char *spec_str; @@ -4841,32 +5680,49 @@ static int bpf_core_reloc_field(struct bpf_program *prog,  		return -EINVAL;  	local_name = btf__name_by_offset(local_btf, local_type->name_off); -	if (str_is_empty(local_name)) +	if (!local_name)  		return -EINVAL;  	spec_str = btf__name_by_offset(local_btf, relo->access_str_off);  	if (str_is_empty(spec_str))  		return -EINVAL; -	err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec); +	err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);  	if (err) { -		pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n", -			prog_name, relo_idx, local_id, local_name, spec_str, -			err); +		pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", +			prog->name, relo_idx, local_id, btf_kind_str(local_type), +			str_is_empty(local_name) ? "<anon>" : local_name, +			spec_str, err);  		return -EINVAL;  	} -	pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx, -		 relo->kind); +	pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name, +		 relo_idx, core_relo_kind_str(relo->kind), relo->kind);  	bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);  	libbpf_print(LIBBPF_DEBUG, "\n"); +	/* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ +	if (relo->kind == BPF_TYPE_ID_LOCAL) { +		targ_res.validate = true; +		targ_res.poison = false; +		targ_res.orig_val = local_spec.root_type_id; +		targ_res.new_val = local_spec.root_type_id; +		goto patch_insn; +	} + +	/* libbpf doesn't support candidate search for anonymous types */ +	if (str_is_empty(spec_str)) { +		pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", +			prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); +		return -EOPNOTSUPP; +	} +  	if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {  		cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);  		if (IS_ERR(cand_ids)) { -			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld", -				prog_name, relo_idx, local_id, local_name, -				PTR_ERR(cand_ids)); +			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld", +				prog->name, relo_idx, local_id, btf_kind_str(local_type), +				local_name, PTR_ERR(cand_ids));  			return PTR_ERR(cand_ids);  		}  		err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL); @@ -4878,36 +5734,51 @@ static int bpf_core_reloc_field(struct bpf_program *prog,  	for (i = 0, j = 0; i < cand_ids->len; i++) {  		cand_id = cand_ids->data[i]; -		cand_type = btf__type_by_id(targ_btf, cand_id); -		cand_name = btf__name_by_offset(targ_btf, cand_type->name_off); - -		err = bpf_core_spec_match(&local_spec, targ_btf, -					  cand_id, &cand_spec); -		pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ", -			 prog_name, relo_idx, i, cand_name); -		bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); -		libbpf_print(LIBBPF_DEBUG, ": %d\n", err); +		err = bpf_core_spec_match(&local_spec, targ_btf, cand_id, &cand_spec);  		if (err < 0) { -			pr_warn("prog '%s': relo #%d: matching error: %d\n", -				prog_name, relo_idx, err); +			pr_warn("prog '%s': relo #%d: error matching candidate #%d ", +				prog->name, relo_idx, i); +			bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); +			libbpf_print(LIBBPF_WARN, ": %d\n", err);  			return err;  		} + +		pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name, +			 relo_idx, err == 0 ? "non-matching" : "matching", i); +		bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); +		libbpf_print(LIBBPF_DEBUG, "\n"); +  		if (err == 0)  			continue; +		err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res); +		if (err) +			return err; +  		if (j == 0) { +			targ_res = cand_res;  			targ_spec = cand_spec;  		} else if (cand_spec.bit_offset != targ_spec.bit_offset) { -			/* if there are many candidates, they should all -			 * resolve to the same bit offset +			/* if there are many field relo candidates, they +			 * should all resolve to the same bit offset  			 */ -			pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n", -				prog_name, relo_idx, cand_spec.bit_offset, +			pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", +				prog->name, relo_idx, cand_spec.bit_offset,  				targ_spec.bit_offset);  			return -EINVAL; +		} else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { +			/* all candidates should result in the same relocation +			 * decision and value, otherwise it's dangerous to +			 * proceed due to ambiguity +			 */ +			pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", +				prog->name, relo_idx, +				cand_res.poison ? "failure" : "success", cand_res.new_val, +				targ_res.poison ? "failure" : "success", targ_res.new_val); +			return -EINVAL;  		} -		cand_ids->data[j++] = cand_spec.spec[0].type_id; +		cand_ids->data[j++] = cand_spec.root_type_id;  	}  	/* @@ -4926,22 +5797,28 @@ static int bpf_core_reloc_field(struct bpf_program *prog,  	 * as well as expected case, depending whether instruction w/  	 * relocation is guarded in some way that makes it unreachable (dead  	 * code) if relocation can't be resolved. This is handled in -	 * bpf_core_reloc_insn() uniformly by replacing that instruction with +	 * bpf_core_patch_insn() uniformly by replacing that instruction with  	 * BPF helper call insn (using invalid helper ID). If that instruction  	 * is indeed unreachable, then it will be ignored and eliminated by  	 * verifier. If it was an error, then verifier will complain and point  	 * to a specific instruction number in its log.  	 */ -	if (j == 0) -		pr_debug("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", -			 prog_name, relo_idx, local_id, local_name, spec_str); +	if (j == 0) { +		pr_debug("prog '%s': relo #%d: no matching targets found\n", +			 prog->name, relo_idx); -	/* bpf_core_reloc_insn should know how to handle missing targ_spec */ -	err = bpf_core_reloc_insn(prog, relo, relo_idx, &local_spec, -				  j ? &targ_spec : NULL); +		/* calculate single target relo result explicitly */ +		err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res); +		if (err) +			return err; +	} + +patch_insn: +	/* bpf_core_patch_insn() should know how to handle missing targ_spec */ +	err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);  	if (err) {  		pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", -			prog_name, relo_idx, relo->insn_off, err); +			prog->name, relo_idx, relo->insn_off, err);  		return -EINVAL;  	} @@ -4949,20 +5826,23 @@ static int bpf_core_reloc_field(struct bpf_program *prog,  }  static int -bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) +bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)  {  	const struct btf_ext_info_sec *sec; -	const struct bpf_field_reloc *rec; +	const struct bpf_core_relo *rec;  	const struct btf_ext_info *seg;  	struct hashmap_entry *entry;  	struct hashmap *cand_cache = NULL;  	struct bpf_program *prog;  	struct btf *targ_btf;  	const char *sec_name; -	int i, err = 0; +	int i, err = 0, insn_idx, sec_idx; + +	if (obj->btf_ext->core_relo_info.len == 0) +		return 0;  	if (targ_btf_path) -		targ_btf = btf__parse_elf(targ_btf_path, NULL); +		targ_btf = btf__parse(targ_btf_path, NULL);  	else  		targ_btf = obj->btf_vmlinux;  	if (IS_ERR_OR_NULL(targ_btf)) { @@ -4976,36 +5856,54 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)  		goto out;  	} -	seg = &obj->btf_ext->field_reloc_info; +	seg = &obj->btf_ext->core_relo_info;  	for_each_btf_ext_sec(seg, sec) {  		sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);  		if (str_is_empty(sec_name)) {  			err = -EINVAL;  			goto out;  		} +		/* bpf_object's ELF is gone by now so it's not easy to find +		 * section index by section name, but we can find *any* +		 * bpf_program within desired section name and use it's +		 * prog->sec_idx to do a proper search by section index and +		 * instruction offset +		 */  		prog = NULL;  		for (i = 0; i < obj->nr_programs; i++) { -			if (!strcmp(obj->programs[i].section_name, sec_name)) { -				prog = &obj->programs[i]; +			prog = &obj->programs[i]; +			if (strcmp(prog->sec_name, sec_name) == 0)  				break; -			}  		}  		if (!prog) { -			pr_warn("failed to find program '%s' for CO-RE offset relocation\n", -				sec_name); -			err = -EINVAL; -			goto out; +			pr_warn("sec '%s': failed to find a BPF program\n", sec_name); +			return -ENOENT;  		} +		sec_idx = prog->sec_idx; -		pr_debug("prog '%s': performing %d CO-RE offset relocs\n", +		pr_debug("sec '%s': found %d CO-RE relocations\n",  			 sec_name, sec->num_info);  		for_each_btf_ext_rec(seg, sec, i, rec) { -			err = bpf_core_reloc_field(prog, rec, i, obj->btf, -						   targ_btf, cand_cache); +			insn_idx = rec->insn_off / BPF_INSN_SZ; +			prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); +			if (!prog) { +				pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n", +					sec_name, insn_idx, i); +				err = -EINVAL; +				goto out; +			} +			/* no need to apply CO-RE relocation if the program is +			 * not going to be loaded +			 */ +			if (!prog->load) +				continue; + +			err = bpf_core_apply_relo(prog, rec, i, obj->btf, +						  targ_btf, cand_cache);  			if (err) {  				pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", -					sec_name, i, err); +					prog->name, i, err);  				goto out;  			}  		} @@ -5024,125 +5922,432 @@ out:  	return err;  } +/* Relocate data references within program code: + *  - map references; + *  - global variable references; + *  - extern references. + */  static int -bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) +bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)  { -	int err = 0; +	int i; -	if (obj->btf_ext->field_reloc_info.len) -		err = bpf_core_reloc_fields(obj, targ_btf_path); +	for (i = 0; i < prog->nr_reloc; i++) { +		struct reloc_desc *relo = &prog->reloc_desc[i]; +		struct bpf_insn *insn = &prog->insns[relo->insn_idx]; +		struct extern_desc *ext; -	return err; +		switch (relo->type) { +		case RELO_LD64: +			insn[0].src_reg = BPF_PSEUDO_MAP_FD; +			insn[0].imm = obj->maps[relo->map_idx].fd; +			relo->processed = true; +			break; +		case RELO_DATA: +			insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; +			insn[1].imm = insn[0].imm + relo->sym_off; +			insn[0].imm = obj->maps[relo->map_idx].fd; +			relo->processed = true; +			break; +		case RELO_EXTERN: +			ext = &obj->externs[relo->sym_off]; +			if (ext->type == EXT_KCFG) { +				insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; +				insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; +				insn[1].imm = ext->kcfg.data_off; +			} else /* EXT_KSYM */ { +				if (ext->ksym.type_id) { /* typed ksyms */ +					insn[0].src_reg = BPF_PSEUDO_BTF_ID; +					insn[0].imm = ext->ksym.vmlinux_btf_id; +				} else { /* typeless ksyms */ +					insn[0].imm = (__u32)ext->ksym.addr; +					insn[1].imm = ext->ksym.addr >> 32; +				} +			} +			relo->processed = true; +			break; +		case RELO_CALL: +			/* will be handled as a follow up pass */ +			break; +		default: +			pr_warn("prog '%s': relo #%d: bad relo type %d\n", +				prog->name, i, relo->type); +			return -EINVAL; +		} +	} + +	return 0;  } -static int -bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, -			struct reloc_desc *relo) +static int adjust_prog_btf_ext_info(const struct bpf_object *obj, +				    const struct bpf_program *prog, +				    const struct btf_ext_info *ext_info, +				    void **prog_info, __u32 *prog_rec_cnt, +				    __u32 *prog_rec_sz)  { -	struct bpf_insn *insn, *new_insn; -	struct bpf_program *text; -	size_t new_cnt; -	int err; +	void *copy_start = NULL, *copy_end = NULL; +	void *rec, *rec_end, *new_prog_info; +	const struct btf_ext_info_sec *sec; +	size_t old_sz, new_sz; +	const char *sec_name; +	int i, off_adj; -	if (prog->idx != obj->efile.text_shndx && prog->main_prog_cnt == 0) { -		text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); -		if (!text) { -			pr_warn("no .text section found yet relo into text exist\n"); -			return -LIBBPF_ERRNO__RELOC; +	for_each_btf_ext_sec(ext_info, sec) { +		sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); +		if (!sec_name) +			return -EINVAL; +		if (strcmp(sec_name, prog->sec_name) != 0) +			continue; + +		for_each_btf_ext_rec(ext_info, sec, i, rec) { +			__u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ; + +			if (insn_off < prog->sec_insn_off) +				continue; +			if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt) +				break; + +			if (!copy_start) +				copy_start = rec; +			copy_end = rec + ext_info->rec_size;  		} -		new_cnt = prog->insns_cnt + text->insns_cnt; -		new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn)); -		if (!new_insn) { -			pr_warn("oom in prog realloc\n"); + +		if (!copy_start) +			return -ENOENT; + +		/* append func/line info of a given (sub-)program to the main +		 * program func/line info +		 */ +		old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size; +		new_sz = old_sz + (copy_end - copy_start); +		new_prog_info = realloc(*prog_info, new_sz); +		if (!new_prog_info)  			return -ENOMEM; -		} -		prog->insns = new_insn; +		*prog_info = new_prog_info; +		*prog_rec_cnt = new_sz / ext_info->rec_size; +		memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start); -		if (obj->btf_ext) { -			err = bpf_program_reloc_btf_ext(prog, obj, -							text->section_name, -							prog->insns_cnt); -			if (err) -				return err; -		} +		/* Kernel instruction offsets are in units of 8-byte +		 * instructions, while .BTF.ext instruction offsets generated +		 * by Clang are in units of bytes. So convert Clang offsets +		 * into kernel offsets and adjust offset according to program +		 * relocated position. +		 */ +		off_adj = prog->sub_insn_off - prog->sec_insn_off; +		rec = new_prog_info + old_sz; +		rec_end = new_prog_info + new_sz; +		for (; rec < rec_end; rec += ext_info->rec_size) { +			__u32 *insn_off = rec; -		memcpy(new_insn + prog->insns_cnt, text->insns, -		       text->insns_cnt * sizeof(*insn)); -		prog->main_prog_cnt = prog->insns_cnt; -		prog->insns_cnt = new_cnt; -		pr_debug("added %zd insn from %s to prog %s\n", -			 text->insns_cnt, text->section_name, -			 prog->section_name); +			*insn_off = *insn_off / BPF_INSN_SZ + off_adj; +		} +		*prog_rec_sz = ext_info->rec_size; +		return 0;  	} -	insn = &prog->insns[relo->insn_idx]; -	insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx; -	return 0; +	return -ENOENT;  }  static int -bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) +reloc_prog_func_and_line_info(const struct bpf_object *obj, +			      struct bpf_program *main_prog, +			      const struct bpf_program *prog)  { -	int i, err; +	int err; -	if (!prog) +	/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't +	 * supprot func/line info +	 */ +	if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC))  		return 0; -	if (obj->btf_ext) { -		err = bpf_program_reloc_btf_ext(prog, obj, -						prog->section_name, 0); -		if (err) +	/* only attempt func info relocation if main program's func_info +	 * relocation was successful +	 */ +	if (main_prog != prog && !main_prog->func_info) +		goto line_info; + +	err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info, +				       &main_prog->func_info, +				       &main_prog->func_info_cnt, +				       &main_prog->func_info_rec_size); +	if (err) { +		if (err != -ENOENT) { +			pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n", +				prog->name, err); +			return err; +		} +		if (main_prog->func_info) { +			/* +			 * Some info has already been found but has problem +			 * in the last btf_ext reloc. Must have to error out. +			 */ +			pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);  			return err; +		} +		/* Have problem loading the very first info. Ignore the rest. */ +		pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n", +			prog->name);  	} -	if (!prog->reloc_desc) +line_info: +	/* don't relocate line info if main program's relocation failed */ +	if (main_prog != prog && !main_prog->line_info)  		return 0; -	for (i = 0; i < prog->nr_reloc; i++) { -		struct reloc_desc *relo = &prog->reloc_desc[i]; -		struct bpf_insn *insn = &prog->insns[relo->insn_idx]; -		struct extern_desc *ext; +	err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info, +				       &main_prog->line_info, +				       &main_prog->line_info_cnt, +				       &main_prog->line_info_rec_size); +	if (err) { +		if (err != -ENOENT) { +			pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n", +				prog->name, err); +			return err; +		} +		if (main_prog->line_info) { +			/* +			 * Some info has already been found but has problem +			 * in the last btf_ext reloc. Must have to error out. +			 */ +			pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name); +			return err; +		} +		/* Have problem loading the very first info. Ignore the rest. */ +		pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n", +			prog->name); +	} +	return 0; +} + +static int cmp_relo_by_insn_idx(const void *key, const void *elem) +{ +	size_t insn_idx = *(const size_t *)key; +	const struct reloc_desc *relo = elem; -		if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { -			pr_warn("relocation out of range: '%s'\n", -				prog->section_name); +	if (insn_idx == relo->insn_idx) +		return 0; +	return insn_idx < relo->insn_idx ? -1 : 1; +} + +static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx) +{ +	return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc, +		       sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); +} + +static int +bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, +		       struct bpf_program *prog) +{ +	size_t sub_insn_idx, insn_idx, new_cnt; +	struct bpf_program *subprog; +	struct bpf_insn *insns, *insn; +	struct reloc_desc *relo; +	int err; + +	err = reloc_prog_func_and_line_info(obj, main_prog, prog); +	if (err) +		return err; + +	for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) { +		insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; +		if (!insn_is_subprog_call(insn)) +			continue; + +		relo = find_prog_insn_relo(prog, insn_idx); +		if (relo && relo->type != RELO_CALL) { +			pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n", +				prog->name, insn_idx, relo->type);  			return -LIBBPF_ERRNO__RELOC;  		} +		if (relo) { +			/* sub-program instruction index is a combination of +			 * an offset of a symbol pointed to by relocation and +			 * call instruction's imm field; for global functions, +			 * call always has imm = -1, but for static functions +			 * relocation is against STT_SECTION and insn->imm +			 * points to a start of a static function +			 */ +			sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1; +		} else { +			/* if subprogram call is to a static function within +			 * the same ELF section, there won't be any relocation +			 * emitted, but it also means there is no additional +			 * offset necessary, insns->imm is relative to +			 * instruction's original position within the section +			 */ +			sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1; +		} -		switch (relo->type) { -		case RELO_LD64: -			insn[0].src_reg = BPF_PSEUDO_MAP_FD; -			insn[0].imm = obj->maps[relo->map_idx].fd; -			break; -		case RELO_DATA: -			insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; -			insn[1].imm = insn[0].imm + relo->sym_off; -			insn[0].imm = obj->maps[relo->map_idx].fd; -			break; -		case RELO_EXTERN: -			ext = &obj->externs[relo->sym_off]; -			if (ext->type == EXT_KCFG) { -				insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; -				insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; -				insn[1].imm = ext->kcfg.data_off; -			} else /* EXT_KSYM */ { -				insn[0].imm = (__u32)ext->ksym.addr; -				insn[1].imm = ext->ksym.addr >> 32; +		/* we enforce that sub-programs should be in .text section */ +		subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx); +		if (!subprog) { +			pr_warn("prog '%s': no .text section found yet sub-program call exists\n", +				prog->name); +			return -LIBBPF_ERRNO__RELOC; +		} + +		/* if it's the first call instruction calling into this +		 * subprogram (meaning this subprog hasn't been processed +		 * yet) within the context of current main program: +		 *   - append it at the end of main program's instructions blog; +		 *   - process is recursively, while current program is put on hold; +		 *   - if that subprogram calls some other not yet processes +		 *   subprogram, same thing will happen recursively until +		 *   there are no more unprocesses subprograms left to append +		 *   and relocate. +		 */ +		if (subprog->sub_insn_off == 0) { +			subprog->sub_insn_off = main_prog->insns_cnt; + +			new_cnt = main_prog->insns_cnt + subprog->insns_cnt; +			insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); +			if (!insns) { +				pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); +				return -ENOMEM;  			} -			break; -		case RELO_CALL: -			err = bpf_program__reloc_text(prog, obj, relo); +			main_prog->insns = insns; +			main_prog->insns_cnt = new_cnt; + +			memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, +			       subprog->insns_cnt * sizeof(*insns)); + +			pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", +				 main_prog->name, subprog->insns_cnt, subprog->name); + +			err = bpf_object__reloc_code(obj, main_prog, subprog);  			if (err)  				return err; -			break; -		default: -			pr_warn("relo #%d: bad relo type %d\n", i, relo->type); -			return -EINVAL;  		} + +		/* main_prog->insns memory could have been re-allocated, so +		 * calculate pointer again +		 */ +		insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; +		/* calculate correct instruction position within current main +		 * prog; each main prog can have a different set of +		 * subprograms appended (potentially in different order as +		 * well), so position of any subprog can be different for +		 * different main programs */ +		insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1; + +		if (relo) +			relo->processed = true; + +		pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n", +			 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);  	} -	zfree(&prog->reloc_desc); -	prog->nr_reloc = 0; +	return 0; +} + +/* + * Relocate sub-program calls. + * + * Algorithm operates as follows. Each entry-point BPF program (referred to as + * main prog) is processed separately. For each subprog (non-entry functions, + * that can be called from either entry progs or other subprogs) gets their + * sub_insn_off reset to zero. This serves as indicator that this subprogram + * hasn't been yet appended and relocated within current main prog. Once its + * relocated, sub_insn_off will point at the position within current main prog + * where given subprog was appended. This will further be used to relocate all + * the call instructions jumping into this subprog. + * + * We start with main program and process all call instructions. If the call + * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off + * is zero), subprog instructions are appended at the end of main program's + * instruction array. Then main program is "put on hold" while we recursively + * process newly appended subprogram. If that subprogram calls into another + * subprogram that hasn't been appended, new subprogram is appended again to + * the *main* prog's instructions (subprog's instructions are always left + * untouched, as they need to be in unmodified state for subsequent main progs + * and subprog instructions are always sent only as part of a main prog) and + * the process continues recursively. Once all the subprogs called from a main + * prog or any of its subprogs are appended (and relocated), all their + * positions within finalized instructions array are known, so it's easy to + * rewrite call instructions with correct relative offsets, corresponding to + * desired target subprog. + * + * Its important to realize that some subprogs might not be called from some + * main prog and any of its called/used subprogs. Those will keep their + * subprog->sub_insn_off as zero at all times and won't be appended to current + * main prog and won't be relocated within the context of current main prog. + * They might still be used from other main progs later. + * + * Visually this process can be shown as below. Suppose we have two main + * programs mainA and mainB and BPF object contains three subprogs: subA, + * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and + * subC both call subB: + * + *        +--------+ +-------+ + *        |        v v       | + *     +--+---+ +--+-+-+ +---+--+ + *     | subA | | subB | | subC | + *     +--+---+ +------+ +---+--+ + *        ^                  ^ + *        |                  | + *    +---+-------+   +------+----+ + *    |   mainA   |   |   mainB   | + *    +-----------+   +-----------+ + * + * We'll start relocating mainA, will find subA, append it and start + * processing sub A recursively: + * + *    +-----------+------+ + *    |   mainA   | subA | + *    +-----------+------+ + * + * At this point we notice that subB is used from subA, so we append it and + * relocate (there are no further subcalls from subB): + * + *    +-----------+------+------+ + *    |   mainA   | subA | subB | + *    +-----------+------+------+ + * + * At this point, we relocate subA calls, then go one level up and finish with + * relocatin mainA calls. mainA is done. + * + * For mainB process is similar but results in different order. We start with + * mainB and skip subA and subB, as mainB never calls them (at least + * directly), but we see subC is needed, so we append and start processing it: + * + *    +-----------+------+ + *    |   mainB   | subC | + *    +-----------+------+ + * Now we see subC needs subB, so we go back to it, append and relocate it: + * + *    +-----------+------+------+ + *    |   mainB   | subC | subB | + *    +-----------+------+------+ + * + * At this point we unwind recursion, relocate calls in subC, then in mainB. + */ +static int +bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) +{ +	struct bpf_program *subprog; +	int i, j, err; + +	/* mark all subprogs as not relocated (yet) within the context of +	 * current main program +	 */ +	for (i = 0; i < obj->nr_programs; i++) { +		subprog = &obj->programs[i]; +		if (!prog_is_subprog(obj, subprog)) +			continue; + +		subprog->sub_insn_off = 0; +		for (j = 0; j < subprog->nr_reloc; j++) +			if (subprog->reloc_desc[j].type == RELO_CALL) +				subprog->reloc_desc[j].processed = false; +	} + +	err = bpf_object__reloc_code(obj, prog, prog); +	if (err) +		return err; + +  	return 0;  } @@ -5161,35 +6366,45 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)  			return err;  		}  	} -	/* ensure .text is relocated first, as it's going to be copied as-is -	 * later for sub-program calls +	/* relocate data references first for all programs and sub-programs, +	 * as they don't change relative to code locations, so subsequent +	 * subprogram processing won't need to re-calculate any of them  	 */  	for (i = 0; i < obj->nr_programs; i++) {  		prog = &obj->programs[i]; -		if (prog->idx != obj->efile.text_shndx) -			continue; - -		err = bpf_program__relocate(prog, obj); +		err = bpf_object__relocate_data(obj, prog);  		if (err) { -			pr_warn("failed to relocate '%s'\n", prog->section_name); +			pr_warn("prog '%s': failed to relocate data references: %d\n", +				prog->name, err);  			return err;  		} -		break;  	} -	/* now relocate everything but .text, which by now is relocated -	 * properly, so we can copy raw sub-program instructions as is safely +	/* now relocate subprogram calls and append used subprograms to main +	 * programs; each copy of subprogram code needs to be relocated +	 * differently for each main program, because its code location might +	 * have changed  	 */  	for (i = 0; i < obj->nr_programs; i++) {  		prog = &obj->programs[i]; -		if (prog->idx == obj->efile.text_shndx) +		/* sub-program's sub-calls are relocated within the context of +		 * its main program only +		 */ +		if (prog_is_subprog(obj, prog))  			continue; -		err = bpf_program__relocate(prog, obj); +		err = bpf_object__relocate_calls(obj, prog);  		if (err) { -			pr_warn("failed to relocate '%s'\n", prog->section_name); +			pr_warn("prog '%s': failed to relocate calls: %d\n", +				prog->name, err);  			return err;  		}  	} +	/* free up relocation descriptors */ +	for (i = 0; i < obj->nr_programs; i++) { +		prog = &obj->programs[i]; +		zfree(&prog->reloc_desc); +		prog->nr_reloc = 0; +	}  	return 0;  } @@ -5230,8 +6445,7 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,  				i, (size_t)GELF_R_SYM(rel.r_info));  			return -LIBBPF_ERRNO__FORMAT;  		} -		name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, -				  sym.st_name) ? : "<?>"; +		name = elf_sym_str(obj, sym.st_name) ?: "<?>";  		if (sym.st_shndx != obj->efile.btf_maps_shndx) {  			pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",  				i, name); @@ -5293,7 +6507,7 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,  		moff /= bpf_ptr_sz;  		if (moff >= map->init_slots_sz) {  			new_sz = moff + 1; -			tmp = realloc(map->init_slots, new_sz * host_ptr_sz); +			tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);  			if (!tmp)  				return -ENOMEM;  			map->init_slots = tmp; @@ -5310,41 +6524,98 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,  	return 0;  } -static int bpf_object__collect_reloc(struct bpf_object *obj) +static int cmp_relocs(const void *_a, const void *_b)  { -	int i, err; +	const struct reloc_desc *a = _a; +	const struct reloc_desc *b = _b; -	if (!obj_elf_valid(obj)) { -		pr_warn("Internal error: elf object is closed\n"); -		return -LIBBPF_ERRNO__INTERNAL; -	} +	if (a->insn_idx != b->insn_idx) +		return a->insn_idx < b->insn_idx ? -1 : 1; + +	/* no two relocations should have the same insn_idx, but ... */ +	if (a->type != b->type) +		return a->type < b->type ? -1 : 1; + +	return 0; +} + +static int bpf_object__collect_relos(struct bpf_object *obj) +{ +	int i, err;  	for (i = 0; i < obj->efile.nr_reloc_sects; i++) {  		GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;  		Elf_Data *data = obj->efile.reloc_sects[i].data;  		int idx = shdr->sh_info; -		struct bpf_program *prog;  		if (shdr->sh_type != SHT_REL) {  			pr_warn("internal error at %d\n", __LINE__);  			return -LIBBPF_ERRNO__INTERNAL;  		} -		if (idx == obj->efile.st_ops_shndx) { +		if (idx == obj->efile.st_ops_shndx)  			err = bpf_object__collect_st_ops_relos(obj, shdr, data); -		} else if (idx == obj->efile.btf_maps_shndx) { +		else if (idx == obj->efile.btf_maps_shndx)  			err = bpf_object__collect_map_relos(obj, shdr, data); -		} else { -			prog = bpf_object__find_prog_by_idx(obj, idx); -			if (!prog) { -				pr_warn("relocation failed: no prog in section(%d)\n", idx); -				return -LIBBPF_ERRNO__RELOC; -			} -			err = bpf_program__collect_reloc(prog, shdr, data, obj); -		} +		else +			err = bpf_object__collect_prog_relos(obj, shdr, data);  		if (err)  			return err;  	} + +	for (i = 0; i < obj->nr_programs; i++) { +		struct bpf_program *p = &obj->programs[i]; +		 +		if (!p->nr_reloc) +			continue; + +		qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); +	} +	return 0; +} + +static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id) +{ +	if (BPF_CLASS(insn->code) == BPF_JMP && +	    BPF_OP(insn->code) == BPF_CALL && +	    BPF_SRC(insn->code) == BPF_K && +	    insn->src_reg == 0 && +	    insn->dst_reg == 0) { +		    *func_id = insn->imm; +		    return true; +	} +	return false; +} + +static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog) +{ +	struct bpf_insn *insn = prog->insns; +	enum bpf_func_id func_id; +	int i; + +	for (i = 0; i < prog->insns_cnt; i++, insn++) { +		if (!insn_is_helper_call(insn, &func_id)) +			continue; + +		/* on kernels that don't yet support +		 * bpf_probe_read_{kernel,user}[_str] helpers, fall back +		 * to bpf_probe_read() which works well for old kernels +		 */ +		switch (func_id) { +		case BPF_FUNC_probe_read_kernel: +		case BPF_FUNC_probe_read_user: +			if (!kernel_supports(FEAT_PROBE_READ_KERN)) +				insn->imm = BPF_FUNC_probe_read; +			break; +		case BPF_FUNC_probe_read_kernel_str: +		case BPF_FUNC_probe_read_user_str: +			if (!kernel_supports(FEAT_PROBE_READ_KERN)) +				insn->imm = BPF_FUNC_probe_read_str; +			break; +		default: +			break; +		} +	}  	return 0;  } @@ -5364,12 +6635,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,  	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));  	load_attr.prog_type = prog->type;  	/* old kernels might not support specifying expected_attach_type */ -	if (!prog->caps->exp_attach_type && prog->sec_def && +	if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def &&  	    prog->sec_def->is_exp_attach_type_optional)  		load_attr.expected_attach_type = 0;  	else  		load_attr.expected_attach_type = prog->expected_attach_type; -	if (prog->caps->name) +	if (kernel_supports(FEAT_PROG_NAME))  		load_attr.name = prog->name;  	load_attr.insns = insns;  	load_attr.insns_cnt = insns_cnt; @@ -5387,7 +6658,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,  	}  	/* specify func_info/line_info only if kernel supports them */  	btf_fd = bpf_object__btf_fd(prog->obj); -	if (btf_fd >= 0 && prog->obj->caps.btf_func) { +	if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) {  		load_attr.prog_btf_fd = btf_fd;  		load_attr.func_info = prog->func_info;  		load_attr.func_info_rec_size = prog->func_info_rec_size; @@ -5413,6 +6684,20 @@ retry_load:  	if (ret >= 0) {  		if (log_buf && load_attr.log_level)  			pr_debug("verifier log:\n%s", log_buf); + +		if (prog->obj->rodata_map_idx >= 0 && +		    kernel_supports(FEAT_PROG_BIND_MAP)) { +			struct bpf_map *rodata_map = +				&prog->obj->maps[prog->obj->rodata_map_idx]; + +			if (bpf_prog_bind_map(ret, bpf_map__fd(rodata_map), NULL)) { +				cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); +				pr_warn("prog '%s': failed to bind .rodata map: %s\n", +					prog->name, cp); +				/* Don't fail hard if can't bind rodata. */ +			} +		} +  		*pfd = ret;  		ret = 0;  		goto out; @@ -5425,7 +6710,7 @@ retry_load:  		free(log_buf);  		goto retry_load;  	} -	ret = -errno; +	ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;  	cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));  	pr_warn("load bpf program failed: %s\n", cp);  	pr_perm_msg(ret); @@ -5465,8 +6750,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)  	int err = 0, fd, i, btf_id;  	if (prog->obj->loaded) { -		pr_warn("prog '%s'('%s'): can't load after object was loaded\n", -			prog->name, prog->section_name); +		pr_warn("prog '%s': can't load after object was loaded\n", prog->name);  		return -EINVAL;  	} @@ -5482,7 +6766,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)  	if (prog->instances.nr < 0 || !prog->instances.fds) {  		if (prog->preprocessor) {  			pr_warn("Internal error: can't load program '%s'\n", -				prog->section_name); +				prog->name);  			return -LIBBPF_ERRNO__INTERNAL;  		} @@ -5497,8 +6781,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)  	if (!prog->preprocessor) {  		if (prog->instances.nr != 1) { -			pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n", -				prog->section_name, prog->instances.nr); +			pr_warn("prog '%s': inconsistent nr(%d) != 1\n", +				prog->name, prog->instances.nr);  		}  		err = load_program(prog, prog->insns, prog->insns_cnt,  				   license, kern_ver, &fd); @@ -5516,13 +6800,13 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)  				   prog->insns_cnt, &result);  		if (err) {  			pr_warn("Preprocessing the %dth instance of program '%s' failed\n", -				i, prog->section_name); +				i, prog->name);  			goto out;  		}  		if (!result.new_insn_ptr || !result.new_insn_cnt) {  			pr_debug("Skip loading the %dth instance of program '%s'\n", -				 i, prog->section_name); +				 i, prog->name);  			prog->instances.fds[i] = -1;  			if (result.pfd)  				*result.pfd = -1; @@ -5533,7 +6817,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)  				   result.new_insn_cnt, license, kern_ver, &fd);  		if (err) {  			pr_warn("Loading the %dth instance of program '%s' failed\n", -				i, prog->section_name); +				i, prog->name);  			goto out;  		} @@ -5543,18 +6827,12 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)  	}  out:  	if (err) -		pr_warn("failed to load program '%s'\n", prog->section_name); +		pr_warn("failed to load program '%s'\n", prog->name);  	zfree(&prog->insns);  	prog->insns_cnt = 0;  	return err;  } -static bool bpf_program__is_function_storage(const struct bpf_program *prog, -					     const struct bpf_object *obj) -{ -	return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls; -} -  static int  bpf_object__load_progs(struct bpf_object *obj, int log_level)  { @@ -5564,11 +6842,17 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)  	for (i = 0; i < obj->nr_programs; i++) {  		prog = &obj->programs[i]; -		if (bpf_program__is_function_storage(prog, obj)) +		err = bpf_object__sanitize_prog(obj, prog); +		if (err) +			return err; +	} + +	for (i = 0; i < obj->nr_programs; i++) { +		prog = &obj->programs[i]; +		if (prog_is_subprog(obj, prog))  			continue;  		if (!prog->load) { -			pr_debug("prog '%s'('%s'): skipped loading\n", -				 prog->name, prog->section_name); +			pr_debug("prog '%s': skipped loading\n", prog->name);  			continue;  		}  		prog->log_level |= log_level; @@ -5629,18 +6913,19 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,  	err = err ? : bpf_object__collect_externs(obj);  	err = err ? : bpf_object__finalize_btf(obj);  	err = err ? : bpf_object__init_maps(obj, opts); -	err = err ? : bpf_object__init_prog_names(obj); -	err = err ? : bpf_object__collect_reloc(obj); +	err = err ? : bpf_object__collect_relos(obj);  	if (err)  		goto out;  	bpf_object__elf_finish(obj);  	bpf_object__for_each_program(prog, obj) { -		prog->sec_def = find_sec_def(prog->section_name); +		prog->sec_def = find_sec_def(prog->sec_name);  		if (!prog->sec_def)  			/* couldn't guess, but user might manually specify */  			continue; +		if (prog->sec_def->is_sleepable) +			prog->prog_flags |= BPF_F_SLEEPABLE;  		bpf_program__set_type(prog, prog->sec_def->prog_type);  		bpf_program__set_expected_attach_type(prog,  				prog->sec_def->expected_attach_type); @@ -5750,11 +7035,11 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)  	bpf_object__for_each_map(m, obj) {  		if (!bpf_map__is_internal(m))  			continue; -		if (!obj->caps.global_data) { +		if (!kernel_supports(FEAT_GLOBAL_DATA)) {  			pr_warn("kernel doesn't support global data\n");  			return -ENOTSUP;  		} -		if (!obj->caps.array_mmap) +		if (!kernel_supports(FEAT_ARRAY_MMAP))  			m->def.map_flags ^= BPF_F_MMAPABLE;  	} @@ -5809,10 +7094,72 @@ out:  	return err;  } +static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) +{ +	struct extern_desc *ext; +	int i, id; + +	for (i = 0; i < obj->nr_extern; i++) { +		const struct btf_type *targ_var, *targ_type; +		__u32 targ_type_id, local_type_id; +		const char *targ_var_name; +		int ret; + +		ext = &obj->externs[i]; +		if (ext->type != EXT_KSYM || !ext->ksym.type_id) +			continue; + +		id = btf__find_by_name_kind(obj->btf_vmlinux, ext->name, +					    BTF_KIND_VAR); +		if (id <= 0) { +			pr_warn("extern (ksym) '%s': failed to find BTF ID in vmlinux BTF.\n", +				ext->name); +			return -ESRCH; +		} + +		/* find local type_id */ +		local_type_id = ext->ksym.type_id; + +		/* find target type_id */ +		targ_var = btf__type_by_id(obj->btf_vmlinux, id); +		targ_var_name = btf__name_by_offset(obj->btf_vmlinux, +						    targ_var->name_off); +		targ_type = skip_mods_and_typedefs(obj->btf_vmlinux, +						   targ_var->type, +						   &targ_type_id); + +		ret = bpf_core_types_are_compat(obj->btf, local_type_id, +						obj->btf_vmlinux, targ_type_id); +		if (ret <= 0) { +			const struct btf_type *local_type; +			const char *targ_name, *local_name; + +			local_type = btf__type_by_id(obj->btf, local_type_id); +			local_name = btf__name_by_offset(obj->btf, +							 local_type->name_off); +			targ_name = btf__name_by_offset(obj->btf_vmlinux, +							targ_type->name_off); + +			pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n", +				ext->name, local_type_id, +				btf_kind_str(local_type), local_name, targ_type_id, +				btf_kind_str(targ_type), targ_name); +			return -EINVAL; +		} + +		ext->is_set = true; +		ext->ksym.vmlinux_btf_id = id; +		pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n", +			 ext->name, id, btf_kind_str(targ_var), targ_var_name); +	} +	return 0; +} +  static int bpf_object__resolve_externs(struct bpf_object *obj,  				       const char *extra_kconfig)  {  	bool need_config = false, need_kallsyms = false; +	bool need_vmlinux_btf = false;  	struct extern_desc *ext;  	void *kcfg_data = NULL;  	int err, i; @@ -5843,7 +7190,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,  			   strncmp(ext->name, "CONFIG_", 7) == 0) {  			need_config = true;  		} else if (ext->type == EXT_KSYM) { -			need_kallsyms = true; +			if (ext->ksym.type_id) +				need_vmlinux_btf = true; +			else +				need_kallsyms = true;  		} else {  			pr_warn("unrecognized extern '%s'\n", ext->name);  			return -EINVAL; @@ -5872,6 +7222,11 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,  		if (err)  			return -EINVAL;  	} +	if (need_vmlinux_btf) { +		err = bpf_object__resolve_ksyms_btf_id(obj); +		if (err) +			return -EINVAL; +	}  	for (i = 0; i < obj->nr_extern; i++) {  		ext = &obj->externs[i]; @@ -5904,11 +7259,10 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)  	}  	err = bpf_object__probe_loading(obj); -	err = err ? : bpf_object__probe_caps(obj); +	err = err ? : bpf_object__load_vmlinux_btf(obj);  	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);  	err = err ? : bpf_object__sanitize_and_load_btf(obj);  	err = err ? : bpf_object__sanitize_maps(obj); -	err = err ? : bpf_object__load_vmlinux_btf(obj);  	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);  	err = err ? : bpf_object__create_maps(obj);  	err = err ? : bpf_object__relocate(obj, attr->target_btf_path); @@ -6016,7 +7370,7 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,  	if (instance < 0 || instance >= prog->instances.nr) {  		pr_warn("invalid prog instance %d of prog %s (max %d)\n", -			instance, prog->section_name, prog->instances.nr); +			instance, prog->name, prog->instances.nr);  		return -EINVAL;  	} @@ -6047,7 +7401,7 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,  	if (instance < 0 || instance >= prog->instances.nr) {  		pr_warn("invalid prog instance %d of prog %s (max %d)\n", -			instance, prog->section_name, prog->instances.nr); +			instance, prog->name, prog->instances.nr);  		return -EINVAL;  	} @@ -6077,8 +7431,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)  	}  	if (prog->instances.nr <= 0) { -		pr_warn("no instances of prog %s to pin\n", -			   prog->section_name); +		pr_warn("no instances of prog %s to pin\n", prog->name);  		return -EINVAL;  	} @@ -6140,8 +7493,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)  	}  	if (prog->instances.nr <= 0) { -		pr_warn("no instances of prog %s to pin\n", -			   prog->section_name); +		pr_warn("no instances of prog %s to pin\n", prog->name);  		return -EINVAL;  	} @@ -6633,7 +7985,7 @@ bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)  	do {  		prog = __bpf_program__iter(prog, obj, true); -	} while (prog && bpf_program__is_function_storage(prog, obj)); +	} while (prog && prog_is_subprog(obj, prog));  	return prog;  } @@ -6645,7 +7997,7 @@ bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)  	do {  		prog = __bpf_program__iter(prog, obj, false); -	} while (prog && bpf_program__is_function_storage(prog, obj)); +	} while (prog && prog_is_subprog(obj, prog));  	return prog;  } @@ -6676,11 +8028,16 @@ const char *bpf_program__name(const struct bpf_program *prog)  	return prog->name;  } +const char *bpf_program__section_name(const struct bpf_program *prog) +{ +	return prog->sec_name; +} +  const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)  {  	const char *title; -	title = prog->section_name; +	title = prog->sec_name;  	if (needs_copy) {  		title = strdup(title);  		if (!title) { @@ -6713,7 +8070,7 @@ int bpf_program__fd(const struct bpf_program *prog)  size_t bpf_program__size(const struct bpf_program *prog)  { -	return prog->insns_cnt * sizeof(struct bpf_insn); +	return prog->insns_cnt * BPF_INSN_SZ;  }  int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, @@ -6753,14 +8110,14 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)  	if (n >= prog->instances.nr || n < 0) {  		pr_warn("Can't get the %dth fd from program %s: only %d instances\n", -			n, prog->section_name, prog->instances.nr); +			n, prog->name, prog->instances.nr);  		return -EINVAL;  	}  	fd = prog->instances.fds[n];  	if (fd < 0) {  		pr_warn("%dth instance of program '%s' is invalid\n", -			n, prog->section_name); +			n, prog->name);  		return -ENOENT;  	} @@ -6910,6 +8267,21 @@ static const struct bpf_sec_def section_defs[] = {  		.expected_attach_type = BPF_TRACE_FEXIT,  		.is_attach_btf = true,  		.attach_fn = attach_trace), +	SEC_DEF("fentry.s/", TRACING, +		.expected_attach_type = BPF_TRACE_FENTRY, +		.is_attach_btf = true, +		.is_sleepable = true, +		.attach_fn = attach_trace), +	SEC_DEF("fmod_ret.s/", TRACING, +		.expected_attach_type = BPF_MODIFY_RETURN, +		.is_attach_btf = true, +		.is_sleepable = true, +		.attach_fn = attach_trace), +	SEC_DEF("fexit.s/", TRACING, +		.expected_attach_type = BPF_TRACE_FEXIT, +		.is_attach_btf = true, +		.is_sleepable = true, +		.attach_fn = attach_trace),  	SEC_DEF("freplace/", EXT,  		.is_attach_btf = true,  		.attach_fn = attach_trace), @@ -6917,6 +8289,11 @@ static const struct bpf_sec_def section_defs[] = {  		.is_attach_btf = true,  		.expected_attach_type = BPF_LSM_MAC,  		.attach_fn = attach_lsm), +	SEC_DEF("lsm.s/", LSM, +		.is_attach_btf = true, +		.is_sleepable = true, +		.expected_attach_type = BPF_LSM_MAC, +		.attach_fn = attach_lsm),  	SEC_DEF("iter/", TRACING,  		.expected_attach_type = BPF_TRACE_ITER,  		.is_attach_btf = true, @@ -7100,7 +8477,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,  	const struct btf *btf;  	struct bpf_map *map;  	Elf_Data *symbols; -	unsigned int moff; +	unsigned int moff, insn_idx;  	const char *name;  	__u32 member_idx;  	GElf_Sym sym; @@ -7122,8 +8499,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,  			return -LIBBPF_ERRNO__FORMAT;  		} -		name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, -				  sym.st_name) ? : "<?>"; +		name = elf_sym_str(obj, sym.st_name) ?: "<?>";  		map = find_struct_ops_map_by_offset(obj, rel.r_offset);  		if (!map) {  			pr_warn("struct_ops reloc: cannot find map at rel.r_offset %zu\n", @@ -7146,6 +8522,12 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,  				map->name, (size_t)rel.r_offset, shdr_idx);  			return -LIBBPF_ERRNO__RELOC;  		} +		if (sym.st_value % BPF_INSN_SZ) { +			pr_warn("struct_ops reloc %s: invalid target program offset %llu\n", +				map->name, (unsigned long long)sym.st_value); +			return -LIBBPF_ERRNO__FORMAT; +		} +		insn_idx = sym.st_value / BPF_INSN_SZ;  		member = find_member_by_offset(st_ops->type, moff * 8);  		if (!member) { @@ -7162,7 +8544,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,  			return -EINVAL;  		} -		prog = bpf_object__find_prog_by_idx(obj, shdr_idx); +		prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);  		if (!prog) {  			pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",  				map->name, shdr_idx, name); @@ -7172,7 +8554,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,  		if (prog->type == BPF_PROG_TYPE_UNSPEC) {  			const struct bpf_sec_def *sec_def; -			sec_def = find_sec_def(prog->section_name); +			sec_def = find_sec_def(prog->sec_name);  			if (sec_def &&  			    sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {  				/* for pr_warn */ @@ -7195,7 +8577,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,  invalid_prog:  	pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n", -		map->name, prog->name, prog->section_name, prog->type, +		map->name, prog->name, prog->sec_name, prog->type,  		prog->attach_btf_id, prog->expected_attach_type, name);  	return -EINVAL;  } @@ -7299,7 +8681,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog)  {  	enum bpf_attach_type attach_type = prog->expected_attach_type;  	__u32 attach_prog_fd = prog->attach_prog_fd; -	const char *name = prog->section_name; +	const char *name = prog->sec_name;  	int i, err;  	if (!name) @@ -7640,7 +9022,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,  		prog->prog_ifindex = attr->ifindex;  		prog->log_level = attr->log_level; -		prog->prog_flags = attr->prog_flags; +		prog->prog_flags |= attr->prog_flags;  		if (!first_prog)  			first_prog = prog;  	} @@ -7826,14 +9208,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,  	int prog_fd, err;  	if (pfd < 0) { -		pr_warn("program '%s': invalid perf event FD %d\n", -			bpf_program__title(prog, false), pfd); +		pr_warn("prog '%s': invalid perf event FD %d\n", +			prog->name, pfd);  		return ERR_PTR(-EINVAL);  	}  	prog_fd = bpf_program__fd(prog);  	if (prog_fd < 0) { -		pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n", -			bpf_program__title(prog, false)); +		pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", +			prog->name);  		return ERR_PTR(-EINVAL);  	} @@ -7846,20 +9228,18 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,  	if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {  		err = -errno;  		free(link); -		pr_warn("program '%s': failed to attach to pfd %d: %s\n", -			bpf_program__title(prog, false), pfd, -			   libbpf_strerror_r(err, errmsg, sizeof(errmsg))); +		pr_warn("prog '%s': failed to attach to pfd %d: %s\n", +			prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));  		if (err == -EPROTO) -			pr_warn("program '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", -				bpf_program__title(prog, false), pfd); +			pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", +				prog->name, pfd);  		return ERR_PTR(err);  	}  	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {  		err = -errno;  		free(link); -		pr_warn("program '%s': failed to enable pfd %d: %s\n", -			bpf_program__title(prog, false), pfd, -			   libbpf_strerror_r(err, errmsg, sizeof(errmsg))); +		pr_warn("prog '%s': failed to enable pfd %d: %s\n", +			prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));  		return ERR_PTR(err);  	}  	return link; @@ -7981,9 +9361,8 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,  	pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,  				    0 /* offset */, -1 /* pid */);  	if (pfd < 0) { -		pr_warn("program '%s': failed to create %s '%s' perf event: %s\n", -			bpf_program__title(prog, false), -			retprobe ? "kretprobe" : "kprobe", func_name, +		pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n", +			prog->name, retprobe ? "kretprobe" : "kprobe", func_name,  			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return ERR_PTR(pfd);  	} @@ -7991,9 +9370,8 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,  	if (IS_ERR(link)) {  		close(pfd);  		err = PTR_ERR(link); -		pr_warn("program '%s': failed to attach to %s '%s': %s\n", -			bpf_program__title(prog, false), -			retprobe ? "kretprobe" : "kprobe", func_name, +		pr_warn("prog '%s': failed to attach to %s '%s': %s\n", +			prog->name, retprobe ? "kretprobe" : "kprobe", func_name,  			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));  		return link;  	} @@ -8006,7 +9384,7 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,  	const char *func_name;  	bool retprobe; -	func_name = bpf_program__title(prog, false) + sec->len; +	func_name = prog->sec_name + sec->len;  	retprobe = strcmp(sec->sec, "kretprobe/") == 0;  	return bpf_program__attach_kprobe(prog, retprobe, func_name); @@ -8024,9 +9402,8 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,  	pfd = perf_event_open_probe(true /* uprobe */, retprobe,  				    binary_path, func_offset, pid);  	if (pfd < 0) { -		pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n", -			bpf_program__title(prog, false), -			retprobe ? "uretprobe" : "uprobe", +		pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", +			prog->name, retprobe ? "uretprobe" : "uprobe",  			binary_path, func_offset,  			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return ERR_PTR(pfd); @@ -8035,9 +9412,8 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,  	if (IS_ERR(link)) {  		close(pfd);  		err = PTR_ERR(link); -		pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n", -			bpf_program__title(prog, false), -			retprobe ? "uretprobe" : "uprobe", +		pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", +			prog->name, retprobe ? "uretprobe" : "uprobe",  			binary_path, func_offset,  			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));  		return link; @@ -8105,9 +9481,8 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,  	pfd = perf_event_open_tracepoint(tp_category, tp_name);  	if (pfd < 0) { -		pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n", -			bpf_program__title(prog, false), -			tp_category, tp_name, +		pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", +			prog->name, tp_category, tp_name,  			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return ERR_PTR(pfd);  	} @@ -8115,9 +9490,8 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,  	if (IS_ERR(link)) {  		close(pfd);  		err = PTR_ERR(link); -		pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n", -			bpf_program__title(prog, false), -			tp_category, tp_name, +		pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", +			prog->name, tp_category, tp_name,  			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));  		return link;  	} @@ -8130,7 +9504,7 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,  	char *sec_name, *tp_cat, *tp_name;  	struct bpf_link *link; -	sec_name = strdup(bpf_program__title(prog, false)); +	sec_name = strdup(prog->sec_name);  	if (!sec_name)  		return ERR_PTR(-ENOMEM); @@ -8159,8 +9533,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,  	prog_fd = bpf_program__fd(prog);  	if (prog_fd < 0) { -		pr_warn("program '%s': can't attach before loaded\n", -			bpf_program__title(prog, false)); +		pr_warn("prog '%s': can't attach before loaded\n", prog->name);  		return ERR_PTR(-EINVAL);  	} @@ -8173,9 +9546,8 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,  	if (pfd < 0) {  		pfd = -errno;  		free(link); -		pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n", -			bpf_program__title(prog, false), tp_name, -			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); +		pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", +			prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return ERR_PTR(pfd);  	}  	link->fd = pfd; @@ -8185,7 +9557,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,  static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,  				      struct bpf_program *prog)  { -	const char *tp_name = bpf_program__title(prog, false) + sec->len; +	const char *tp_name = prog->sec_name + sec->len;  	return bpf_program__attach_raw_tracepoint(prog, tp_name);  } @@ -8199,8 +9571,7 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)  	prog_fd = bpf_program__fd(prog);  	if (prog_fd < 0) { -		pr_warn("program '%s': can't attach before loaded\n", -			bpf_program__title(prog, false)); +		pr_warn("prog '%s': can't attach before loaded\n", prog->name);  		return ERR_PTR(-EINVAL);  	} @@ -8213,9 +9584,8 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)  	if (pfd < 0) {  		pfd = -errno;  		free(link); -		pr_warn("program '%s': failed to attach: %s\n", -			bpf_program__title(prog, false), -			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); +		pr_warn("prog '%s': failed to attach: %s\n", +			prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return ERR_PTR(pfd);  	}  	link->fd = pfd; @@ -8251,9 +9621,11 @@ static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,  }  static struct bpf_link * -bpf_program__attach_fd(struct bpf_program *prog, int target_fd, +bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,  		       const char *target_name)  { +	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, +			    .target_btf_id = btf_id);  	enum bpf_attach_type attach_type;  	char errmsg[STRERR_BUFSIZE];  	struct bpf_link *link; @@ -8261,8 +9633,7 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd,  	prog_fd = bpf_program__fd(prog);  	if (prog_fd < 0) { -		pr_warn("program '%s': can't attach before loaded\n", -			bpf_program__title(prog, false)); +		pr_warn("prog '%s': can't attach before loaded\n", prog->name);  		return ERR_PTR(-EINVAL);  	} @@ -8272,12 +9643,12 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd,  	link->detach = &bpf_link__detach_fd;  	attach_type = bpf_program__get_expected_attach_type(prog); -	link_fd = bpf_link_create(prog_fd, target_fd, attach_type, NULL); +	link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);  	if (link_fd < 0) {  		link_fd = -errno;  		free(link); -		pr_warn("program '%s': failed to attach to %s: %s\n", -			bpf_program__title(prog, false), target_name, +		pr_warn("prog '%s': failed to attach to %s: %s\n", +			prog->name, target_name,  			libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));  		return ERR_PTR(link_fd);  	} @@ -8288,19 +9659,51 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd,  struct bpf_link *  bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)  { -	return bpf_program__attach_fd(prog, cgroup_fd, "cgroup"); +	return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");  }  struct bpf_link *  bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)  { -	return bpf_program__attach_fd(prog, netns_fd, "netns"); +	return bpf_program__attach_fd(prog, netns_fd, 0, "netns");  }  struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)  {  	/* target_fd/target_ifindex use the same field in LINK_CREATE */ -	return bpf_program__attach_fd(prog, ifindex, "xdp"); +	return bpf_program__attach_fd(prog, ifindex, 0, "xdp"); +} + +struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog, +					      int target_fd, +					      const char *attach_func_name) +{ +	int btf_id; + +	if (!!target_fd != !!attach_func_name) { +		pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n", +			prog->name); +		return ERR_PTR(-EINVAL); +	} + +	if (prog->type != BPF_PROG_TYPE_EXT) { +		pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", +			prog->name); +		return ERR_PTR(-EINVAL); +	} + +	if (target_fd) { +		btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd); +		if (btf_id < 0) +			return ERR_PTR(btf_id); + +		return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace"); +	} else { +		/* no target, so use raw_tracepoint_open for compatibility +		 * with old kernels +		 */ +		return bpf_program__attach_trace(prog); +	}  }  struct bpf_link * @@ -8321,8 +9724,7 @@ bpf_program__attach_iter(struct bpf_program *prog,  	prog_fd = bpf_program__fd(prog);  	if (prog_fd < 0) { -		pr_warn("program '%s': can't attach before loaded\n", -			bpf_program__title(prog, false)); +		pr_warn("prog '%s': can't attach before loaded\n", prog->name);  		return ERR_PTR(-EINVAL);  	} @@ -8336,9 +9738,8 @@ bpf_program__attach_iter(struct bpf_program *prog,  	if (link_fd < 0) {  		link_fd = -errno;  		free(link); -		pr_warn("program '%s': failed to attach to iterator: %s\n", -			bpf_program__title(prog, false), -			libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); +		pr_warn("prog '%s': failed to attach to iterator: %s\n", +			prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));  		return ERR_PTR(link_fd);  	}  	link->fd = link_fd; @@ -8349,7 +9750,7 @@ struct bpf_link *bpf_program__attach(struct bpf_program *prog)  {  	const struct bpf_sec_def *sec_def; -	sec_def = find_sec_def(bpf_program__title(prog, false)); +	sec_def = find_sec_def(prog->sec_name);  	if (!sec_def || !sec_def->attach_fn)  		return ERR_PTR(-ESRCH); @@ -8594,7 +9995,7 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,  	struct perf_buffer_params p = {};  	struct perf_event_attr attr = { 0, }; -	attr.config = PERF_COUNT_SW_BPF_OUTPUT, +	attr.config = PERF_COUNT_SW_BPF_OUTPUT;  	attr.type = PERF_TYPE_SOFTWARE;  	attr.sample_type = PERF_SAMPLE_RAW;  	attr.sample_period = 1; @@ -8832,6 +10233,11 @@ static int perf_buffer__process_records(struct perf_buffer *pb,  	return 0;  } +int perf_buffer__epoll_fd(const struct perf_buffer *pb) +{ +	return pb->epoll_fd; +} +  int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)  {  	int i, cnt, err; @@ -8849,6 +10255,55 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)  	return cnt < 0 ? -errno : cnt;  } +/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer + * manager. + */ +size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb) +{ +	return pb->cpu_cnt; +} + +/* + * Return perf_event FD of a ring buffer in *buf_idx* slot of + * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using + * select()/poll()/epoll() Linux syscalls. + */ +int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) +{ +	struct perf_cpu_buf *cpu_buf; + +	if (buf_idx >= pb->cpu_cnt) +		return -EINVAL; + +	cpu_buf = pb->cpu_bufs[buf_idx]; +	if (!cpu_buf) +		return -ENOENT; + +	return cpu_buf->fd; +} + +/* + * Consume data from perf ring buffer corresponding to slot *buf_idx* in + * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to + * consume, do nothing and return success. + * Returns: + *   - 0 on success; + *   - <0 on failure. + */ +int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx) +{ +	struct perf_cpu_buf *cpu_buf; + +	if (buf_idx >= pb->cpu_cnt) +		return -EINVAL; + +	cpu_buf = pb->cpu_bufs[buf_idx]; +	if (!cpu_buf) +		return -ENOENT; + +	return perf_buffer__process_records(pb, cpu_buf); +} +  int perf_buffer__consume(struct perf_buffer *pb)  {  	int i, err; @@ -8861,7 +10316,7 @@ int perf_buffer__consume(struct perf_buffer *pb)  		err = perf_buffer__process_records(pb, cpu_buf);  		if (err) { -			pr_warn("error while processing records: %d\n", err); +			pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);  			return err;  		}  	} @@ -9129,9 +10584,8 @@ int bpf_program__set_attach_target(struct bpf_program *prog,  		btf_id = libbpf_find_prog_btf_id(attach_func_name,  						 attach_prog_fd);  	else -		btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux, -					       attach_func_name, -					       prog->expected_attach_type); +		btf_id = libbpf_find_vmlinux_btf_id(attach_func_name, +						    prog->expected_attach_type);  	if (btf_id < 0)  		return btf_id; @@ -9365,12 +10819,11 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)  		struct bpf_program *prog = *s->progs[i].prog;  		struct bpf_link **link = s->progs[i].link;  		const struct bpf_sec_def *sec_def; -		const char *sec_name = bpf_program__title(prog, false);  		if (!prog->load)  			continue; -		sec_def = find_sec_def(sec_name); +		sec_def = find_sec_def(prog->sec_name);  		if (!sec_def || !sec_def->attach_fn)  			continue; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 5ecb4069a9f0..6909ee81113a 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -198,8 +198,9 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,  					 __u32 ifindex);  LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog); -LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog, -					  bool needs_copy); +LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog); +LIBBPF_API LIBBPF_DEPRECATED("BPF program title is confusing term; please use bpf_program__section_name() instead") +const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy);  LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);  LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload); @@ -260,6 +261,9 @@ LIBBPF_API struct bpf_link *  bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);  LIBBPF_API struct bpf_link *  bpf_program__attach_xdp(struct bpf_program *prog, int ifindex); +LIBBPF_API struct bpf_link * +bpf_program__attach_freplace(struct bpf_program *prog, +			     int target_fd, const char *attach_func_name);  struct bpf_map; @@ -588,8 +592,12 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,  		     const struct perf_buffer_raw_opts *opts);  LIBBPF_API void perf_buffer__free(struct perf_buffer *pb); +LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb);  LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);  LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb); +LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx); +LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb); +LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx);  typedef enum bpf_perf_event_ret  	(*bpf_perf_event_print_t)(struct perf_event_header *hdr, diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index e35bd6cdbdbf..4ebfadf45b47 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -299,3 +299,41 @@ LIBBPF_0.1.0 {  		btf__set_fd;  		btf__set_pointer_size;  } LIBBPF_0.0.9; + +LIBBPF_0.2.0 { +	global: +		bpf_prog_bind_map; +		bpf_prog_test_run_opts; +		bpf_program__attach_freplace; +		bpf_program__section_name; +		btf__add_array; +		btf__add_const; +		btf__add_enum; +		btf__add_enum_value; +		btf__add_datasec; +		btf__add_datasec_var_info; +		btf__add_field; +		btf__add_func; +		btf__add_func_param; +		btf__add_func_proto; +		btf__add_fwd; +		btf__add_int; +		btf__add_ptr; +		btf__add_restrict; +		btf__add_str; +		btf__add_struct; +		btf__add_typedef; +		btf__add_union; +		btf__add_var; +		btf__add_volatile; +		btf__endianness; +		btf__find_str; +		btf__new_empty; +		btf__set_endianness; +		btf__str_by_offset; +		perf_buffer__buffer_cnt; +		perf_buffer__buffer_fd; +		perf_buffer__epoll_fd; +		perf_buffer__consume_buffer; +		xsk_socket__create_shared; +} LIBBPF_0.1.0; diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h index a23ae1ac27eb..947d8bd8a7bb 100644 --- a/tools/lib/bpf/libbpf_common.h +++ b/tools/lib/bpf/libbpf_common.h @@ -15,6 +15,8 @@  #define LIBBPF_API __attribute__((visibility("default")))  #endif +#define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg))) +  /* Helper macro to declare and initialize libbpf options struct   *   * This dance with uninitialized declaration, followed by memset to zero, diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 50d70e90d5f1..d99bc847bf84 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -9,6 +9,15 @@  #ifndef __LIBBPF_LIBBPF_INTERNAL_H  #define __LIBBPF_LIBBPF_INTERNAL_H +#include <stdlib.h> +#include <limits.h> + +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + +/* prevent accidental re-addition of reallocarray() */ +#pragma GCC poison reallocarray +  #include "libbpf.h"  #define BTF_INFO_ENC(kind, kind_flag, vlen) \ @@ -23,6 +32,12 @@  #define BTF_PARAM_ENC(name, type) (name), (type)  #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size) +#ifndef likely +#define likely(x) __builtin_expect(!!(x), 1) +#endif +#ifndef unlikely +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif  #ifndef min  # define min(x, y) ((x) < (y) ? (x) : (y))  #endif @@ -63,6 +78,37 @@ do {				\  #define pr_info(fmt, ...)	__pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)  #define pr_debug(fmt, ...)	__pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif +/* + * Re-implement glibc's reallocarray() for libbpf internal-only use. + * reallocarray(), unfortunately, is not available in all versions of glibc, + * so requires extra feature detection and using reallocarray() stub from + * <tools/libc_compat.h> and COMPAT_NEED_REALLOCARRAY. All this complicates + * build of libbpf unnecessarily and is just a maintenance burden. Instead, + * it's trivial to implement libbpf-specific internal version and use it + * throughout libbpf. + */ +static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size) +{ +	size_t total; + +#if __has_builtin(__builtin_mul_overflow) +	if (unlikely(__builtin_mul_overflow(nmemb, size, &total))) +		return NULL; +#else +	if (size == 0 || nmemb > ULONG_MAX / size) +		return NULL; +	total = nmemb * size; +#endif +	return realloc(ptr, total); +} + +void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz, +		  size_t cur_cnt, size_t max_cnt, size_t add_cnt); +int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt); +  static inline bool libbpf_validate_opts(const char *opts,  					size_t opts_sz, size_t user_sz,  					const char *type_name) @@ -94,6 +140,11 @@ static inline bool libbpf_validate_opts(const char *opts,  	((opts) && opts->sz >= offsetofend(typeof(*(opts)), field))  #define OPTS_GET(opts, field, fallback_value) \  	(OPTS_HAS(opts, field) ? (opts)->field : fallback_value) +#define OPTS_SET(opts, field, value)		\ +	do {					\ +		if (OPTS_HAS(opts, field))	\ +			(opts)->field = value;	\ +	} while (0)  int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);  int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); @@ -105,18 +156,6 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,  int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,  				__u32 *off); -struct nlattr; -typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); -int libbpf_netlink_open(unsigned int *nl_pid); -int libbpf_nl_get_link(int sock, unsigned int nl_pid, -		       libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); -int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, -			libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie); -int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, -			libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); -int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, -			 libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); -  struct btf_ext_info {  	/*  	 * info points to the individual info section (e.g. func_info and @@ -138,6 +177,44 @@ struct btf_ext_info {  	     i < (sec)->num_info;					\  	     i++, rec = (void *)rec + (seg)->rec_size) +/* + * The .BTF.ext ELF section layout defined as + *   struct btf_ext_header + *   func_info subsection + * + * The func_info subsection layout: + *   record size for struct bpf_func_info in the func_info subsection + *   struct btf_sec_func_info for section #1 + *   a list of bpf_func_info records for section #1 + *     where struct bpf_func_info mimics one in include/uapi/linux/bpf.h + *     but may not be identical + *   struct btf_sec_func_info for section #2 + *   a list of bpf_func_info records for section #2 + *   ...... + * + * Note that the bpf_func_info record size in .BTF.ext may not + * be the same as the one defined in include/uapi/linux/bpf.h. + * The loader should ensure that record_size meets minimum + * requirement and pass the record as is to the kernel. The + * kernel will handle the func_info properly based on its contents. + */ +struct btf_ext_header { +	__u16	magic; +	__u8	version; +	__u8	flags; +	__u32	hdr_len; + +	/* All offsets are in bytes relative to the end of this header */ +	__u32	func_info_off; +	__u32	func_info_len; +	__u32	line_info_off; +	__u32	line_info_len; + +	/* optional part of .BTF.ext header */ +	__u32	core_relo_off; +	__u32	core_relo_len; +}; +  struct btf_ext {  	union {  		struct btf_ext_header *hdr; @@ -145,7 +222,7 @@ struct btf_ext {  	};  	struct btf_ext_info func_info;  	struct btf_ext_info line_info; -	struct btf_ext_info field_reloc_info; +	struct btf_ext_info core_relo_info;  	__u32 data_size;  }; @@ -170,32 +247,40 @@ struct bpf_line_info_min {  	__u32	line_col;  }; -/* bpf_field_info_kind encodes which aspect of captured field has to be - * adjusted by relocations. Currently supported values are: - *   - BPF_FIELD_BYTE_OFFSET: field offset (in bytes); - *   - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise); +/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value + * has to be adjusted by relocations.   */ -enum bpf_field_info_kind { +enum bpf_core_relo_kind {  	BPF_FIELD_BYTE_OFFSET = 0,	/* field byte offset */ -	BPF_FIELD_BYTE_SIZE = 1, +	BPF_FIELD_BYTE_SIZE = 1,	/* field size in bytes */  	BPF_FIELD_EXISTS = 2,		/* field existence in target kernel */ -	BPF_FIELD_SIGNED = 3, -	BPF_FIELD_LSHIFT_U64 = 4, -	BPF_FIELD_RSHIFT_U64 = 5, +	BPF_FIELD_SIGNED = 3,		/* field signedness (0 - unsigned, 1 - signed) */ +	BPF_FIELD_LSHIFT_U64 = 4,	/* bitfield-specific left bitshift */ +	BPF_FIELD_RSHIFT_U64 = 5,	/* bitfield-specific right bitshift */ +	BPF_TYPE_ID_LOCAL = 6,		/* type ID in local BPF object */ +	BPF_TYPE_ID_TARGET = 7,		/* type ID in target kernel */ +	BPF_TYPE_EXISTS = 8,		/* type existence in target kernel */ +	BPF_TYPE_SIZE = 9,		/* type size in bytes */ +	BPF_ENUMVAL_EXISTS = 10,	/* enum value existence in target kernel */ +	BPF_ENUMVAL_VALUE = 11,		/* enum value integer value */  }; -/* The minimum bpf_field_reloc checked by the loader +/* The minimum bpf_core_relo checked by the loader   * - * Field relocation captures the following data: + * CO-RE relocation captures the following data:   * - insn_off - instruction offset (in bytes) within a BPF program that needs   *   its insn->imm field to be relocated with actual field info;   * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - *   field; + *   type or field;   * - access_str_off - offset into corresponding .BTF string section. String - *   itself encodes an accessed field using a sequence of field and array - *   indicies, separated by colon (:). It's conceptually very close to LLVM's - *   getelementptr ([0]) instruction's arguments for identifying offset to  - *   a field. + *   interpretation depends on specific relocation kind: + *     - for field-based relocations, string encodes an accessed field using + *     a sequence of field and array indices, separated by colon (:). It's + *     conceptually very close to LLVM's getelementptr ([0]) instruction's + *     arguments for identifying offset to a field. + *     - for type-based relocations, strings is expected to be just "0"; + *     - for enum value-based relocations, string contains an index of enum + *     value within its enum type;   *   * Example to provide a better feel.   * @@ -226,11 +311,11 @@ enum bpf_field_info_kind {   *   *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction   */ -struct bpf_field_reloc { +struct bpf_core_relo {  	__u32   insn_off;  	__u32   type_id;  	__u32   access_str_off; -	enum bpf_field_info_kind kind; +	enum bpf_core_relo_kind kind;  };  #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 5a3d3f078408..5482a9b7ae2d 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -17,9 +17,6 @@  #include "libbpf.h"  #include "libbpf_internal.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -  static bool grep(const char *buffer, const char *pattern)  {  	return !!strstr(buffer, pattern); @@ -173,7 +170,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,  	return btf_fd;  } -static int load_sk_storage_btf(void) +static int load_local_storage_btf(void)  {  	const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l";  	/* struct bpf_spin_lock { @@ -232,12 +229,13 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)  		key_size	= 0;  		break;  	case BPF_MAP_TYPE_SK_STORAGE: +	case BPF_MAP_TYPE_INODE_STORAGE:  		btf_key_type_id = 1;  		btf_value_type_id = 3;  		value_size = 8;  		max_entries = 0;  		map_flags = BPF_F_NO_PREALLOC; -		btf_fd = load_sk_storage_btf(); +		btf_fd = load_local_storage_btf();  		if (btf_fd < 0)  			return false;  		break; diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 312f887570b2..4dd73de00b6f 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -15,13 +15,12 @@  #include "libbpf_internal.h"  #include "nlattr.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -  #ifndef SOL_NETLINK  #define SOL_NETLINK 270  #endif +typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); +  typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,  			      void *cookie); @@ -31,7 +30,7 @@ struct xdp_id_md {  	struct xdp_link_info info;  }; -int libbpf_netlink_open(__u32 *nl_pid) +static int libbpf_netlink_open(__u32 *nl_pid)  {  	struct sockaddr_nl sa;  	socklen_t addrlen; @@ -283,6 +282,9 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)  	return 0;  } +static int libbpf_nl_get_link(int sock, unsigned int nl_pid, +			      libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); +  int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,  			  size_t info_size, __u32 flags)  { @@ -368,121 +370,3 @@ int libbpf_nl_get_link(int sock, unsigned int nl_pid,  	return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,  				dump_link_nlmsg, cookie);  } - -static int __dump_class_nlmsg(struct nlmsghdr *nlh, -			      libbpf_dump_nlmsg_t dump_class_nlmsg, -			      void *cookie) -{ -	struct nlattr *tb[TCA_MAX + 1], *attr; -	struct tcmsg *t = NLMSG_DATA(nlh); -	int len; - -	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); -	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); -	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) -		return -LIBBPF_ERRNO__NLPARSE; - -	return dump_class_nlmsg(cookie, t, tb); -} - -int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, -			libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie) -{ -	struct { -		struct nlmsghdr nlh; -		struct tcmsg t; -	} req = { -		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), -		.nlh.nlmsg_type = RTM_GETTCLASS, -		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, -		.t.tcm_family = AF_UNSPEC, -		.t.tcm_ifindex = ifindex, -	}; -	int seq = time(NULL); - -	req.nlh.nlmsg_seq = seq; -	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) -		return -errno; - -	return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg, -				dump_class_nlmsg, cookie); -} - -static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh, -			      libbpf_dump_nlmsg_t dump_qdisc_nlmsg, -			      void *cookie) -{ -	struct nlattr *tb[TCA_MAX + 1], *attr; -	struct tcmsg *t = NLMSG_DATA(nlh); -	int len; - -	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); -	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); -	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) -		return -LIBBPF_ERRNO__NLPARSE; - -	return dump_qdisc_nlmsg(cookie, t, tb); -} - -int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, -			libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie) -{ -	struct { -		struct nlmsghdr nlh; -		struct tcmsg t; -	} req = { -		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), -		.nlh.nlmsg_type = RTM_GETQDISC, -		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, -		.t.tcm_family = AF_UNSPEC, -		.t.tcm_ifindex = ifindex, -	}; -	int seq = time(NULL); - -	req.nlh.nlmsg_seq = seq; -	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) -		return -errno; - -	return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg, -				dump_qdisc_nlmsg, cookie); -} - -static int __dump_filter_nlmsg(struct nlmsghdr *nlh, -			       libbpf_dump_nlmsg_t dump_filter_nlmsg, -			       void *cookie) -{ -	struct nlattr *tb[TCA_MAX + 1], *attr; -	struct tcmsg *t = NLMSG_DATA(nlh); -	int len; - -	len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t)); -	attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t))); -	if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0) -		return -LIBBPF_ERRNO__NLPARSE; - -	return dump_filter_nlmsg(cookie, t, tb); -} - -int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, -			 libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie) -{ -	struct { -		struct nlmsghdr nlh; -		struct tcmsg t; -	} req = { -		.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)), -		.nlh.nlmsg_type = RTM_GETTFILTER, -		.nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, -		.t.tcm_family = AF_UNSPEC, -		.t.tcm_ifindex = ifindex, -		.t.tcm_parent = handle, -	}; -	int seq = time(NULL); - -	req.nlh.nlmsg_seq = seq; -	if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) -		return -errno; - -	return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg, -				dump_filter_nlmsg, cookie); -} diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 0ad41dfea8eb..b607fa9852b1 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -7,14 +7,11 @@   */  #include <errno.h> -#include "nlattr.h" -#include "libbpf_internal.h" -#include <linux/rtnetlink.h>  #include <string.h>  #include <stdio.h> - -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 +#include <linux/rtnetlink.h> +#include "nlattr.h" +#include "libbpf_internal.h"  static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = {  	[LIBBPF_NLA_U8]		= sizeof(uint8_t), diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 4fc6c6cbb4eb..5c6522c89af1 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -16,15 +16,11 @@  #include <asm/barrier.h>  #include <sys/mman.h>  #include <sys/epoll.h> -#include <tools/libc_compat.h>  #include "libbpf.h"  #include "libbpf_internal.h"  #include "bpf.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -  struct ring {  	ring_buffer_sample_fn sample_cb;  	void *ctx; @@ -82,12 +78,12 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,  		return -EINVAL;  	} -	tmp = reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings)); +	tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));  	if (!tmp)  		return -ENOMEM;  	rb->rings = tmp; -	tmp = reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events)); +	tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));  	if (!tmp)  		return -ENOMEM;  	rb->events = tmp; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index f7f4efb70a4c..e3c98c007825 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -20,6 +20,8 @@  #include <linux/if_ether.h>  #include <linux/if_packet.h>  #include <linux/if_xdp.h> +#include <linux/kernel.h> +#include <linux/list.h>  #include <linux/sockios.h>  #include <net/if.h>  #include <sys/ioctl.h> @@ -32,9 +34,6 @@  #include "libbpf_internal.h"  #include "xsk.h" -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -  #ifndef SOL_XDP   #define SOL_XDP 283  #endif @@ -48,26 +47,35 @@  #endif  struct xsk_umem { -	struct xsk_ring_prod *fill; -	struct xsk_ring_cons *comp; +	struct xsk_ring_prod *fill_save; +	struct xsk_ring_cons *comp_save;  	char *umem_area;  	struct xsk_umem_config config;  	int fd;  	int refcount; +	struct list_head ctx_list; +}; + +struct xsk_ctx { +	struct xsk_ring_prod *fill; +	struct xsk_ring_cons *comp; +	__u32 queue_id; +	struct xsk_umem *umem; +	int refcount; +	int ifindex; +	struct list_head list; +	int prog_fd; +	int xsks_map_fd; +	char ifname[IFNAMSIZ];  };  struct xsk_socket {  	struct xsk_ring_cons *rx;  	struct xsk_ring_prod *tx;  	__u64 outstanding_tx; -	struct xsk_umem *umem; +	struct xsk_ctx *ctx;  	struct xsk_socket_config config;  	int fd; -	int ifindex; -	int prog_fd; -	int xsks_map_fd; -	__u32 queue_id; -	char ifname[IFNAMSIZ];  };  struct xsk_nl_info { @@ -203,15 +211,73 @@ static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)  	return -EINVAL;  } +static int xsk_create_umem_rings(struct xsk_umem *umem, int fd, +				 struct xsk_ring_prod *fill, +				 struct xsk_ring_cons *comp) +{ +	struct xdp_mmap_offsets off; +	void *map; +	int err; + +	err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING, +			 &umem->config.fill_size, +			 sizeof(umem->config.fill_size)); +	if (err) +		return -errno; + +	err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, +			 &umem->config.comp_size, +			 sizeof(umem->config.comp_size)); +	if (err) +		return -errno; + +	err = xsk_get_mmap_offsets(fd, &off); +	if (err) +		return -errno; + +	map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), +		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, +		   XDP_UMEM_PGOFF_FILL_RING); +	if (map == MAP_FAILED) +		return -errno; + +	fill->mask = umem->config.fill_size - 1; +	fill->size = umem->config.fill_size; +	fill->producer = map + off.fr.producer; +	fill->consumer = map + off.fr.consumer; +	fill->flags = map + off.fr.flags; +	fill->ring = map + off.fr.desc; +	fill->cached_cons = umem->config.fill_size; + +	map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), +		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, +		   XDP_UMEM_PGOFF_COMPLETION_RING); +	if (map == MAP_FAILED) { +		err = -errno; +		goto out_mmap; +	} + +	comp->mask = umem->config.comp_size - 1; +	comp->size = umem->config.comp_size; +	comp->producer = map + off.cr.producer; +	comp->consumer = map + off.cr.consumer; +	comp->flags = map + off.cr.flags; +	comp->ring = map + off.cr.desc; + +	return 0; + +out_mmap: +	munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); +	return err; +} +  int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,  			    __u64 size, struct xsk_ring_prod *fill,  			    struct xsk_ring_cons *comp,  			    const struct xsk_umem_config *usr_config)  { -	struct xdp_mmap_offsets off;  	struct xdp_umem_reg mr;  	struct xsk_umem *umem; -	void *map;  	int err;  	if (!umem_area || !umem_ptr || !fill || !comp) @@ -230,6 +296,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,  	}  	umem->umem_area = umem_area; +	INIT_LIST_HEAD(&umem->ctx_list);  	xsk_set_umem_config(&umem->config, usr_config);  	memset(&mr, 0, sizeof(mr)); @@ -244,71 +311,16 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,  		err = -errno;  		goto out_socket;  	} -	err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING, -			 &umem->config.fill_size, -			 sizeof(umem->config.fill_size)); -	if (err) { -		err = -errno; -		goto out_socket; -	} -	err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, -			 &umem->config.comp_size, -			 sizeof(umem->config.comp_size)); -	if (err) { -		err = -errno; -		goto out_socket; -	} -	err = xsk_get_mmap_offsets(umem->fd, &off); -	if (err) { -		err = -errno; -		goto out_socket; -	} - -	map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), -		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd, -		   XDP_UMEM_PGOFF_FILL_RING); -	if (map == MAP_FAILED) { -		err = -errno; +	err = xsk_create_umem_rings(umem, umem->fd, fill, comp); +	if (err)  		goto out_socket; -	} - -	umem->fill = fill; -	fill->mask = umem->config.fill_size - 1; -	fill->size = umem->config.fill_size; -	fill->producer = map + off.fr.producer; -	fill->consumer = map + off.fr.consumer; -	fill->flags = map + off.fr.flags; -	fill->ring = map + off.fr.desc; -	fill->cached_prod = *fill->producer; -	/* cached_cons is "size" bigger than the real consumer pointer -	 * See xsk_prod_nb_free -	 */ -	fill->cached_cons = *fill->consumer + umem->config.fill_size; - -	map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), -		   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd, -		   XDP_UMEM_PGOFF_COMPLETION_RING); -	if (map == MAP_FAILED) { -		err = -errno; -		goto out_mmap; -	} - -	umem->comp = comp; -	comp->mask = umem->config.comp_size - 1; -	comp->size = umem->config.comp_size; -	comp->producer = map + off.cr.producer; -	comp->consumer = map + off.cr.consumer; -	comp->flags = map + off.cr.flags; -	comp->ring = map + off.cr.desc; -	comp->cached_prod = *comp->producer; -	comp->cached_cons = *comp->consumer; +	umem->fill_save = fill; +	umem->comp_save = comp;  	*umem_ptr = umem;  	return 0; -out_mmap: -	munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));  out_socket:  	close(umem->fd);  out_umem_alloc: @@ -342,6 +354,7 @@ DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)  static int xsk_load_xdp_prog(struct xsk_socket *xsk)  {  	static const int log_buf_size = 16 * 1024; +	struct xsk_ctx *ctx = xsk->ctx;  	char log_buf[log_buf_size];  	int err, prog_fd; @@ -369,7 +382,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)  		/* *(u32 *)(r10 - 4) = r2 */  		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),  		/* r1 = xskmap[] */ -		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), +		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),  		/* r3 = XDP_PASS */  		BPF_MOV64_IMM(BPF_REG_3, 2),  		/* call bpf_redirect_map */ @@ -381,7 +394,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)  		/* r2 += -4 */  		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),  		/* r1 = xskmap[] */ -		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), +		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),  		/* call bpf_map_lookup_elem */  		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),  		/* r1 = r0 */ @@ -393,7 +406,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)  		/* r2 = *(u32 *)(r10 - 4) */  		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),  		/* r1 = xskmap[] */ -		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), +		BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd),  		/* r3 = 0 */  		BPF_MOV64_IMM(BPF_REG_3, 0),  		/* call bpf_redirect_map */ @@ -411,19 +424,21 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)  		return prog_fd;  	} -	err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags); +	err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd, +				  xsk->config.xdp_flags);  	if (err) {  		close(prog_fd);  		return err;  	} -	xsk->prog_fd = prog_fd; +	ctx->prog_fd = prog_fd;  	return 0;  }  static int xsk_get_max_queues(struct xsk_socket *xsk)  {  	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; +	struct xsk_ctx *ctx = xsk->ctx;  	struct ifreq ifr = {};  	int fd, err, ret; @@ -432,7 +447,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)  		return -errno;  	ifr.ifr_data = (void *)&channels; -	memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1); +	memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1);  	ifr.ifr_name[IFNAMSIZ - 1] = '\0';  	err = ioctl(fd, SIOCETHTOOL, &ifr);  	if (err && errno != EOPNOTSUPP) { @@ -460,6 +475,7 @@ out:  static int xsk_create_bpf_maps(struct xsk_socket *xsk)  { +	struct xsk_ctx *ctx = xsk->ctx;  	int max_queues;  	int fd; @@ -472,15 +488,17 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)  	if (fd < 0)  		return fd; -	xsk->xsks_map_fd = fd; +	ctx->xsks_map_fd = fd;  	return 0;  }  static void xsk_delete_bpf_maps(struct xsk_socket *xsk)  { -	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id); -	close(xsk->xsks_map_fd); +	struct xsk_ctx *ctx = xsk->ctx; + +	bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id); +	close(ctx->xsks_map_fd);  }  static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) @@ -488,10 +506,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)  	__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);  	__u32 map_len = sizeof(struct bpf_map_info);  	struct bpf_prog_info prog_info = {}; +	struct xsk_ctx *ctx = xsk->ctx;  	struct bpf_map_info map_info;  	int fd, err; -	err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); +	err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);  	if (err)  		return err; @@ -505,11 +524,11 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)  	prog_info.nr_map_ids = num_maps;  	prog_info.map_ids = (__u64)(unsigned long)map_ids; -	err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); +	err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len);  	if (err)  		goto out_map_ids; -	xsk->xsks_map_fd = -1; +	ctx->xsks_map_fd = -1;  	for (i = 0; i < prog_info.nr_map_ids; i++) {  		fd = bpf_map_get_fd_by_id(map_ids[i]); @@ -523,7 +542,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)  		}  		if (!strcmp(map_info.name, "xsks_map")) { -			xsk->xsks_map_fd = fd; +			ctx->xsks_map_fd = fd;  			continue;  		} @@ -531,7 +550,7 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)  	}  	err = 0; -	if (xsk->xsks_map_fd == -1) +	if (ctx->xsks_map_fd == -1)  		err = -ENOENT;  out_map_ids: @@ -541,16 +560,19 @@ out_map_ids:  static int xsk_set_bpf_maps(struct xsk_socket *xsk)  { -	return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, +	struct xsk_ctx *ctx = xsk->ctx; + +	return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id,  				   &xsk->fd, 0);  }  static int xsk_setup_xdp_prog(struct xsk_socket *xsk)  { +	struct xsk_ctx *ctx = xsk->ctx;  	__u32 prog_id = 0;  	int err; -	err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id, +	err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,  				  xsk->config.xdp_flags);  	if (err)  		return err; @@ -566,12 +588,12 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)  			return err;  		}  	} else { -		xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); -		if (xsk->prog_fd < 0) +		ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); +		if (ctx->prog_fd < 0)  			return -errno;  		err = xsk_lookup_bpf_maps(xsk);  		if (err) { -			close(xsk->prog_fd); +			close(ctx->prog_fd);  			return err;  		}  	} @@ -580,23 +602,108 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)  		err = xsk_set_bpf_maps(xsk);  	if (err) {  		xsk_delete_bpf_maps(xsk); -		close(xsk->prog_fd); +		close(ctx->prog_fd);  		return err;  	}  	return 0;  } -int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, -		       __u32 queue_id, struct xsk_umem *umem, -		       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, -		       const struct xsk_socket_config *usr_config) +static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, +				   __u32 queue_id) +{ +	struct xsk_ctx *ctx; + +	if (list_empty(&umem->ctx_list)) +		return NULL; + +	list_for_each_entry(ctx, &umem->ctx_list, list) { +		if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) { +			ctx->refcount++; +			return ctx; +		} +	} + +	return NULL; +} + +static void xsk_put_ctx(struct xsk_ctx *ctx) +{ +	struct xsk_umem *umem = ctx->umem; +	struct xdp_mmap_offsets off; +	int err; + +	if (--ctx->refcount == 0) { +		err = xsk_get_mmap_offsets(umem->fd, &off); +		if (!err) { +			munmap(ctx->fill->ring - off.fr.desc, +			       off.fr.desc + umem->config.fill_size * +			       sizeof(__u64)); +			munmap(ctx->comp->ring - off.cr.desc, +			       off.cr.desc + umem->config.comp_size * +			       sizeof(__u64)); +		} + +		list_del(&ctx->list); +		free(ctx); +	} +} + +static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, +				      struct xsk_umem *umem, int ifindex, +				      const char *ifname, __u32 queue_id, +				      struct xsk_ring_prod *fill, +				      struct xsk_ring_cons *comp) +{ +	struct xsk_ctx *ctx; +	int err; + +	ctx = calloc(1, sizeof(*ctx)); +	if (!ctx) +		return NULL; + +	if (!umem->fill_save) { +		err = xsk_create_umem_rings(umem, xsk->fd, fill, comp); +		if (err) { +			free(ctx); +			return NULL; +		} +	} else if (umem->fill_save != fill || umem->comp_save != comp) { +		/* Copy over rings to new structs. */ +		memcpy(fill, umem->fill_save, sizeof(*fill)); +		memcpy(comp, umem->comp_save, sizeof(*comp)); +	} + +	ctx->ifindex = ifindex; +	ctx->refcount = 1; +	ctx->umem = umem; +	ctx->queue_id = queue_id; +	memcpy(ctx->ifname, ifname, IFNAMSIZ - 1); +	ctx->ifname[IFNAMSIZ - 1] = '\0'; + +	umem->fill_save = NULL; +	umem->comp_save = NULL; +	ctx->fill = fill; +	ctx->comp = comp; +	list_add(&ctx->list, &umem->ctx_list); +	return ctx; +} + +int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, +			      const char *ifname, +			      __u32 queue_id, struct xsk_umem *umem, +			      struct xsk_ring_cons *rx, +			      struct xsk_ring_prod *tx, +			      struct xsk_ring_prod *fill, +			      struct xsk_ring_cons *comp, +			      const struct xsk_socket_config *usr_config)  {  	void *rx_map = NULL, *tx_map = NULL;  	struct sockaddr_xdp sxdp = {};  	struct xdp_mmap_offsets off;  	struct xsk_socket *xsk; -	int err; +	struct xsk_ctx *ctx; +	int err, ifindex;  	if (!umem || !xsk_ptr || !(rx || tx))  		return -EFAULT; @@ -609,10 +716,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,  	if (err)  		goto out_xsk_alloc; -	if (umem->refcount && -	    !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { -		pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n"); -		err = -EBUSY; +	xsk->outstanding_tx = 0; +	ifindex = if_nametoindex(ifname); +	if (!ifindex) { +		err = -errno;  		goto out_xsk_alloc;  	} @@ -626,16 +733,21 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,  		xsk->fd = umem->fd;  	} -	xsk->outstanding_tx = 0; -	xsk->queue_id = queue_id; -	xsk->umem = umem; -	xsk->ifindex = if_nametoindex(ifname); -	if (!xsk->ifindex) { -		err = -errno; -		goto out_socket; +	ctx = xsk_get_ctx(umem, ifindex, queue_id); +	if (!ctx) { +		if (!fill || !comp) { +			err = -EFAULT; +			goto out_socket; +		} + +		ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id, +				     fill, comp); +		if (!ctx) { +			err = -ENOMEM; +			goto out_socket; +		}  	} -	memcpy(xsk->ifname, ifname, IFNAMSIZ - 1); -	xsk->ifname[IFNAMSIZ - 1] = '\0'; +	xsk->ctx = ctx;  	if (rx) {  		err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, @@ -643,7 +755,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,  				 sizeof(xsk->config.rx_size));  		if (err) {  			err = -errno; -			goto out_socket; +			goto out_put_ctx;  		}  	}  	if (tx) { @@ -652,14 +764,14 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,  				 sizeof(xsk->config.tx_size));  		if (err) {  			err = -errno; -			goto out_socket; +			goto out_put_ctx;  		}  	}  	err = xsk_get_mmap_offsets(xsk->fd, &off);  	if (err) {  		err = -errno; -		goto out_socket; +		goto out_put_ctx;  	}  	if (rx) { @@ -669,7 +781,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,  			      xsk->fd, XDP_PGOFF_RX_RING);  		if (rx_map == MAP_FAILED) {  			err = -errno; -			goto out_socket; +			goto out_put_ctx;  		}  		rx->mask = xsk->config.rx_size - 1; @@ -708,10 +820,10 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,  	xsk->tx = tx;  	sxdp.sxdp_family = PF_XDP; -	sxdp.sxdp_ifindex = xsk->ifindex; -	sxdp.sxdp_queue_id = xsk->queue_id; +	sxdp.sxdp_ifindex = ctx->ifindex; +	sxdp.sxdp_queue_id = ctx->queue_id;  	if (umem->refcount > 1) { -		sxdp.sxdp_flags = XDP_SHARED_UMEM; +		sxdp.sxdp_flags |= XDP_SHARED_UMEM;  		sxdp.sxdp_shared_umem_fd = umem->fd;  	} else {  		sxdp.sxdp_flags = xsk->config.bind_flags; @@ -723,7 +835,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,  		goto out_mmap_tx;  	} -	xsk->prog_fd = -1; +	ctx->prog_fd = -1;  	if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {  		err = xsk_setup_xdp_prog(xsk); @@ -742,6 +854,8 @@ out_mmap_rx:  	if (rx)  		munmap(rx_map, off.rx.desc +  		       xsk->config.rx_size * sizeof(struct xdp_desc)); +out_put_ctx: +	xsk_put_ctx(ctx);  out_socket:  	if (--umem->refcount)  		close(xsk->fd); @@ -750,25 +864,24 @@ out_xsk_alloc:  	return err;  } -int xsk_umem__delete(struct xsk_umem *umem) +int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, +		       __u32 queue_id, struct xsk_umem *umem, +		       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, +		       const struct xsk_socket_config *usr_config)  { -	struct xdp_mmap_offsets off; -	int err; +	return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem, +					 rx, tx, umem->fill_save, +					 umem->comp_save, usr_config); +} +int xsk_umem__delete(struct xsk_umem *umem) +{  	if (!umem)  		return 0;  	if (umem->refcount)  		return -EBUSY; -	err = xsk_get_mmap_offsets(umem->fd, &off); -	if (!err) { -		munmap(umem->fill->ring - off.fr.desc, -		       off.fr.desc + umem->config.fill_size * sizeof(__u64)); -		munmap(umem->comp->ring - off.cr.desc, -		       off.cr.desc + umem->config.comp_size * sizeof(__u64)); -	} -  	close(umem->fd);  	free(umem); @@ -778,15 +891,16 @@ int xsk_umem__delete(struct xsk_umem *umem)  void xsk_socket__delete(struct xsk_socket *xsk)  {  	size_t desc_sz = sizeof(struct xdp_desc); +	struct xsk_ctx *ctx = xsk->ctx;  	struct xdp_mmap_offsets off;  	int err;  	if (!xsk)  		return; -	if (xsk->prog_fd != -1) { +	if (ctx->prog_fd != -1) {  		xsk_delete_bpf_maps(xsk); -		close(xsk->prog_fd); +		close(ctx->prog_fd);  	}  	err = xsk_get_mmap_offsets(xsk->fd, &off); @@ -799,14 +913,15 @@ void xsk_socket__delete(struct xsk_socket *xsk)  			munmap(xsk->tx->ring - off.tx.desc,  			       off.tx.desc + xsk->config.tx_size * desc_sz);  		} -  	} -	xsk->umem->refcount--; +	xsk_put_ctx(ctx); + +	ctx->umem->refcount--;  	/* Do not close an fd that also has an associated umem connected  	 * to it.  	 */ -	if (xsk->fd != xsk->umem->fd) +	if (xsk->fd != ctx->umem->fd)  		close(xsk->fd);  	free(xsk);  } diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 584f6820a639..1069c46364ff 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -234,6 +234,15 @@ LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,  				  struct xsk_ring_cons *rx,  				  struct xsk_ring_prod *tx,  				  const struct xsk_socket_config *config); +LIBBPF_API int +xsk_socket__create_shared(struct xsk_socket **xsk_ptr, +			  const char *ifname, +			  __u32 queue_id, struct xsk_umem *umem, +			  struct xsk_ring_cons *rx, +			  struct xsk_ring_prod *tx, +			  struct xsk_ring_prod *fill, +			  struct xsk_ring_cons *comp, +			  const struct xsk_socket_config *config);  /* Returns 0 for success and -EBUSY if the umem is still in use. */  LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);  | 
