diff options
Diffstat (limited to 'tools')
321 files changed, 12636 insertions, 2440 deletions
diff --git a/tools/arch/mips/include/uapi/asm/perf_regs.h b/tools/arch/mips/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..d0f4ecd616cf --- /dev/null +++ b/tools/arch/mips/include/uapi/asm/perf_regs.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_MIPS_PERF_REGS_H +#define _ASM_MIPS_PERF_REGS_H + +enum perf_event_mips_regs { + PERF_REG_MIPS_PC, + PERF_REG_MIPS_R1, + PERF_REG_MIPS_R2, + PERF_REG_MIPS_R3, + PERF_REG_MIPS_R4, + PERF_REG_MIPS_R5, + PERF_REG_MIPS_R6, + PERF_REG_MIPS_R7, + PERF_REG_MIPS_R8, + PERF_REG_MIPS_R9, + PERF_REG_MIPS_R10, + PERF_REG_MIPS_R11, + PERF_REG_MIPS_R12, + PERF_REG_MIPS_R13, + PERF_REG_MIPS_R14, + PERF_REG_MIPS_R15, + PERF_REG_MIPS_R16, + PERF_REG_MIPS_R17, + PERF_REG_MIPS_R18, + PERF_REG_MIPS_R19, + PERF_REG_MIPS_R20, + PERF_REG_MIPS_R21, + PERF_REG_MIPS_R22, + PERF_REG_MIPS_R23, + PERF_REG_MIPS_R24, + PERF_REG_MIPS_R25, + PERF_REG_MIPS_R26, + PERF_REG_MIPS_R27, + PERF_REG_MIPS_R28, + PERF_REG_MIPS_R29, + PERF_REG_MIPS_R30, + PERF_REG_MIPS_R31, + PERF_REG_MIPS_MAX = PERF_REG_MIPS_R31 + 1, +}; +#endif /* _ASM_MIPS_PERF_REGS_H */ diff --git a/tools/arch/powerpc/include/uapi/asm/errno.h b/tools/arch/powerpc/include/uapi/asm/errno.h index cc79856896a1..4ba87de32be0 100644 --- a/tools/arch/powerpc/include/uapi/asm/errno.h +++ b/tools/arch/powerpc/include/uapi/asm/errno.h @@ -2,6 +2,7 @@ #ifndef _ASM_POWERPC_ERRNO_H #define _ASM_POWERPC_ERRNO_H +#undef EDEADLOCK #include <asm-generic/errno.h> #undef EDEADLOCK diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index cc96e26d69f7..ac37830ae941 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -84,7 +84,7 @@ /* CPU types for specific tunings: */ #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */ #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ @@ -236,6 +236,8 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -290,6 +292,8 @@ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -336,6 +340,7 @@ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ @@ -354,6 +359,7 @@ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ @@ -374,6 +380,7 @@ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index b7dd944dc867..8f28fafa98b3 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -56,11 +56,8 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -#ifdef CONFIG_IOMMU_SUPPORT -# define DISABLE_ENQCMD 0 -#else -# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) -#endif +/* Force disable because it's broken beyond repair */ +#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 45029354e0a8..211ba3375ee9 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -185,6 +185,9 @@ #define MSR_PEBS_DATA_CFG 0x000003f2 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define PERF_CAP_METRICS_IDX 15 +#define PERF_CAP_PT_IDX 16 + #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 #define MSR_IA32_RTIT_CTL 0x00000570 @@ -265,6 +268,7 @@ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) #define DEBUGCTLMSR_TR (1UL << 6) #define DEBUGCTLMSR_BTS (1UL << 7) #define DEBUGCTLMSR_BTINT (1UL << 8) @@ -533,9 +537,9 @@ /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG 0xc0010010 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 5a3022c8af82..0662f644aad9 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr { __u16 flags; } smm; + __u16 pad; + __u32 flags; __u64 preemption_timer_deadline; }; diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index b8e650a985e3..946d761adbd3 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -27,6 +27,7 @@ #define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 +#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE 0x08000000 #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 1e299ac73c86..1cc9da6e29c7 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -4,7 +4,7 @@ #include <linux/linkage.h> #include <asm/errno.h> #include <asm/cpufeatures.h> -#include <asm/alternative-asm.h> +#include <asm/alternative.h> #include <asm/export.h> .pushsection .noinstr.text, "ax" diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 0bfd26e4ca9e..9827ae267f96 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -3,7 +3,7 @@ #include <linux/linkage.h> #include <asm/cpufeatures.h> -#include <asm/alternative-asm.h> +#include <asm/alternative.h> #include <asm/export.h> /* diff --git a/tools/bootconfig/include/linux/bootconfig.h b/tools/bootconfig/include/linux/bootconfig.h index 078cbd2ba651..de7f30f99af3 100644 --- a/tools/bootconfig/include/linux/bootconfig.h +++ b/tools/bootconfig/include/linux/bootconfig.h @@ -4,4 +4,8 @@ #include "../../../../include/linux/bootconfig.h" +#ifndef fallthrough +# define fallthrough +#endif + #endif diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 7362bef1a368..6cd6080cac04 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -399,6 +399,7 @@ static int apply_xbc(const char *path, const char *xbc_path) } /* TODO: Ensure the @path is initramfs/initrd image */ if (fstat(fd, &stat) < 0) { + ret = -errno; pr_err("Failed to get the size of %s\n", path); goto out; } diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index 790944c35602..baee8591ac76 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -30,7 +30,8 @@ CGROUP COMMANDS | *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | | **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | | **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** | -| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** } +| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** | +| **sock_release** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION @@ -106,6 +107,7 @@ DESCRIPTION **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8); **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8); **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8). + **sock_release** closing an userspace inet socket (since 5.9). **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* Detach *PROG* from the cgroup *CGROUP* and attach type diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 358c7309d419..fe1b38e7e887 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -44,7 +44,7 @@ PROG COMMANDS | **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** | | **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | | **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** | -| **cgroup/getsockopt** | **cgroup/setsockopt** | +| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** | | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** | } | *ATTACH_TYPE* := { diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index b3073ae84018..d73232be1e99 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -136,7 +136,7 @@ endif BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool -BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o) +BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o) OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ @@ -180,6 +180,9 @@ endif CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS) +$(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c + $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $< + $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index d67518bcbd44..cc33c5824a2f 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -478,7 +478,7 @@ _bpftool() cgroup/recvmsg4 cgroup/recvmsg6 \ cgroup/post_bind4 cgroup/post_bind6 \ cgroup/sysctl cgroup/getsockopt \ - cgroup/setsockopt struct_ops \ + cgroup/setsockopt cgroup/sock_release struct_ops \ fentry fexit freplace sk_lookup" -- \ "$cur" ) ) return 0 @@ -1021,7 +1021,7 @@ _bpftool() device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \ getpeername4 getpeername6 getsockname4 getsockname6 \ sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \ - setsockopt' + setsockopt sock_release' local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag name' case $prev in @@ -1032,7 +1032,7 @@ _bpftool() ingress|egress|sock_create|sock_ops|device|bind4|bind6|\ post_bind4|post_bind6|connect4|connect6|getpeername4|\ getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\ - recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt) + recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release) COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ "$cur" ) ) return 0 diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index d901cc1b904a..6e53b1d393f4 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -28,7 +28,8 @@ " connect6 | getpeername4 | getpeername6 |\n" \ " getsockname4 | getsockname6 | sendmsg4 |\n" \ " sendmsg6 | recvmsg4 | recvmsg6 |\n" \ - " sysctl | getsockopt | setsockopt }" + " sysctl | getsockopt | setsockopt |\n" \ + " sock_release }" static unsigned int query_flags; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 31ade77f5ef8..1d71ff8c52fa 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -18,6 +18,7 @@ #include <sys/stat.h> #include <sys/mman.h> #include <bpf/btf.h> +#include <bpf/bpf_gen_internal.h> #include "json_writer.h" #include "main.h" @@ -106,8 +107,10 @@ static int codegen_datasec_def(struct bpf_object *obj, if (strcmp(sec_name, ".data") == 0) { sec_ident = "data"; + strip_mods = true; } else if (strcmp(sec_name, ".bss") == 0) { sec_ident = "bss"; + strip_mods = true; } else if (strcmp(sec_name, ".rodata") == 0) { sec_ident = "rodata"; strip_mods = true; @@ -129,6 +132,10 @@ static int codegen_datasec_def(struct bpf_object *obj, int need_off = sec_var->offset, align_off, align; __u32 var_type_id = var->type; + /* static variables are not exposed through BPF skeleton */ + if (btf_var(var)->linkage == BTF_VAR_STATIC) + continue; + if (off > need_off) { p_err("Something is wrong for %s's variable #%d: need offset %d, already at %d.\n", sec_name, i, need_off, off); @@ -268,6 +275,327 @@ static void codegen(const char *template, ...) free(s); } +static void print_hex(const char *data, int data_sz) +{ + int i, len; + + for (i = 0, len = 0; i < data_sz; i++) { + int w = data[i] ? 4 : 2; + + len += w; + if (len > 78) { + printf("\\\n"); + len = w; + } + if (!data[i]) + printf("\\0"); + else + printf("\\x%02x", (unsigned char)data[i]); + } +} + +static size_t bpf_map_mmap_sz(const struct bpf_map *map) +{ + long page_sz = sysconf(_SC_PAGE_SIZE); + size_t map_sz; + + map_sz = (size_t)roundup(bpf_map__value_size(map), 8) * bpf_map__max_entries(map); + map_sz = roundup(map_sz, page_sz); + return map_sz; +} + +static void codegen_attach_detach(struct bpf_object *obj, const char *obj_name) +{ + struct bpf_program *prog; + + bpf_object__for_each_program(prog, obj) { + const char *tp_name; + + codegen("\ + \n\ + \n\ + static inline int \n\ + %1$s__%2$s__attach(struct %1$s *skel) \n\ + { \n\ + int prog_fd = skel->progs.%2$s.prog_fd; \n\ + ", obj_name, bpf_program__name(prog)); + + switch (bpf_program__get_type(prog)) { + case BPF_PROG_TYPE_RAW_TRACEPOINT: + tp_name = strchr(bpf_program__section_name(prog), '/') + 1; + printf("\tint fd = bpf_raw_tracepoint_open(\"%s\", prog_fd);\n", tp_name); + break; + case BPF_PROG_TYPE_TRACING: + printf("\tint fd = bpf_raw_tracepoint_open(NULL, prog_fd);\n"); + break; + default: + printf("\tint fd = ((void)prog_fd, 0); /* auto-attach not supported */\n"); + break; + } + codegen("\ + \n\ + \n\ + if (fd > 0) \n\ + skel->links.%1$s_fd = fd; \n\ + return fd; \n\ + } \n\ + ", bpf_program__name(prog)); + } + + codegen("\ + \n\ + \n\ + static inline int \n\ + %1$s__attach(struct %1$s *skel) \n\ + { \n\ + int ret = 0; \n\ + \n\ + ", obj_name); + + bpf_object__for_each_program(prog, obj) { + codegen("\ + \n\ + ret = ret < 0 ? ret : %1$s__%2$s__attach(skel); \n\ + ", obj_name, bpf_program__name(prog)); + } + + codegen("\ + \n\ + return ret < 0 ? ret : 0; \n\ + } \n\ + \n\ + static inline void \n\ + %1$s__detach(struct %1$s *skel) \n\ + { \n\ + ", obj_name); + + bpf_object__for_each_program(prog, obj) { + codegen("\ + \n\ + skel_closenz(skel->links.%1$s_fd); \n\ + ", bpf_program__name(prog)); + } + + codegen("\ + \n\ + } \n\ + "); +} + +static void codegen_destroy(struct bpf_object *obj, const char *obj_name) +{ + struct bpf_program *prog; + struct bpf_map *map; + + codegen("\ + \n\ + static void \n\ + %1$s__destroy(struct %1$s *skel) \n\ + { \n\ + if (!skel) \n\ + return; \n\ + %1$s__detach(skel); \n\ + ", + obj_name); + + bpf_object__for_each_program(prog, obj) { + codegen("\ + \n\ + skel_closenz(skel->progs.%1$s.prog_fd); \n\ + ", bpf_program__name(prog)); + } + + bpf_object__for_each_map(map, obj) { + const char * ident; + + ident = get_map_ident(map); + if (!ident) + continue; + if (bpf_map__is_internal(map) && + (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + printf("\tmunmap(skel->%1$s, %2$zd);\n", + ident, bpf_map_mmap_sz(map)); + codegen("\ + \n\ + skel_closenz(skel->maps.%1$s.map_fd); \n\ + ", ident); + } + codegen("\ + \n\ + free(skel); \n\ + } \n\ + ", + obj_name); +} + +static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard) +{ + struct bpf_object_load_attr load_attr = {}; + DECLARE_LIBBPF_OPTS(gen_loader_opts, opts); + struct bpf_map *map; + int err = 0; + + err = bpf_object__gen_loader(obj, &opts); + if (err) + return err; + + load_attr.obj = obj; + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + load_attr.log_level = 1 + 2 + 4; + + err = bpf_object__load_xattr(&load_attr); + if (err) { + p_err("failed to load object file"); + goto out; + } + /* If there was no error during load then gen_loader_opts + * are populated with the loader program. + */ + + /* finish generating 'struct skel' */ + codegen("\ + \n\ + }; \n\ + ", obj_name); + + + codegen_attach_detach(obj, obj_name); + + codegen_destroy(obj, obj_name); + + codegen("\ + \n\ + static inline struct %1$s * \n\ + %1$s__open(void) \n\ + { \n\ + struct %1$s *skel; \n\ + \n\ + skel = calloc(sizeof(*skel), 1); \n\ + if (!skel) \n\ + goto cleanup; \n\ + skel->ctx.sz = (void *)&skel->links - (void *)skel; \n\ + ", + obj_name, opts.data_sz); + bpf_object__for_each_map(map, obj) { + const char *ident; + const void *mmap_data = NULL; + size_t mmap_size = 0; + + ident = get_map_ident(map); + if (!ident) + continue; + + if (!bpf_map__is_internal(map) || + !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + continue; + + codegen("\ + \n\ + skel->%1$s = \n\ + mmap(NULL, %2$zd, PROT_READ | PROT_WRITE,\n\ + MAP_SHARED | MAP_ANONYMOUS, -1, 0); \n\ + if (skel->%1$s == (void *) -1) \n\ + goto cleanup; \n\ + memcpy(skel->%1$s, (void *)\"\\ \n\ + ", ident, bpf_map_mmap_sz(map)); + mmap_data = bpf_map__initial_value(map, &mmap_size); + print_hex(mmap_data, mmap_size); + printf("\", %2$zd);\n" + "\tskel->maps.%1$s.initial_value = (__u64)(long)skel->%1$s;\n", + ident, mmap_size); + } + codegen("\ + \n\ + return skel; \n\ + cleanup: \n\ + %1$s__destroy(skel); \n\ + return NULL; \n\ + } \n\ + \n\ + static inline int \n\ + %1$s__load(struct %1$s *skel) \n\ + { \n\ + struct bpf_load_and_run_opts opts = {}; \n\ + int err; \n\ + \n\ + opts.ctx = (struct bpf_loader_ctx *)skel; \n\ + opts.data_sz = %2$d; \n\ + opts.data = (void *)\"\\ \n\ + ", + obj_name, opts.data_sz); + print_hex(opts.data, opts.data_sz); + codegen("\ + \n\ + \"; \n\ + "); + + codegen("\ + \n\ + opts.insns_sz = %d; \n\ + opts.insns = (void *)\"\\ \n\ + ", + opts.insns_sz); + print_hex(opts.insns, opts.insns_sz); + codegen("\ + \n\ + \"; \n\ + err = bpf_load_and_run(&opts); \n\ + if (err < 0) \n\ + return err; \n\ + ", obj_name); + bpf_object__for_each_map(map, obj) { + const char *ident, *mmap_flags; + + ident = get_map_ident(map); + if (!ident) + continue; + + if (!bpf_map__is_internal(map) || + !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) + continue; + if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG) + mmap_flags = "PROT_READ"; + else + mmap_flags = "PROT_READ | PROT_WRITE"; + + printf("\tskel->%1$s =\n" + "\t\tmmap(skel->%1$s, %2$zd, %3$s, MAP_SHARED | MAP_FIXED,\n" + "\t\t\tskel->maps.%1$s.map_fd, 0);\n", + ident, bpf_map_mmap_sz(map), mmap_flags); + } + codegen("\ + \n\ + return 0; \n\ + } \n\ + \n\ + static inline struct %1$s * \n\ + %1$s__open_and_load(void) \n\ + { \n\ + struct %1$s *skel; \n\ + \n\ + skel = %1$s__open(); \n\ + if (!skel) \n\ + return NULL; \n\ + if (%1$s__load(skel)) { \n\ + %1$s__destroy(skel); \n\ + return NULL; \n\ + } \n\ + return skel; \n\ + } \n\ + ", obj_name); + + codegen("\ + \n\ + \n\ + #endif /* %s */ \n\ + ", + header_guard); + err = 0; +out: + return err; +} + static int do_skeleton(int argc, char **argv) { char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")]; @@ -277,7 +605,7 @@ static int do_skeleton(int argc, char **argv) struct bpf_object *obj = NULL; const char *file, *ident; struct bpf_program *prog; - int fd, len, err = -1; + int fd, err = -1; struct bpf_map *map; struct btf *btf; struct stat st; @@ -359,7 +687,25 @@ static int do_skeleton(int argc, char **argv) } get_header_guard(header_guard, obj_name); - codegen("\ + if (use_loader) { + codegen("\ + \n\ + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\ + /* THIS FILE IS AUTOGENERATED! */ \n\ + #ifndef %2$s \n\ + #define %2$s \n\ + \n\ + #include <stdlib.h> \n\ + #include <bpf/bpf.h> \n\ + #include <bpf/skel_internal.h> \n\ + \n\ + struct %1$s { \n\ + struct bpf_loader_ctx ctx; \n\ + ", + obj_name, header_guard + ); + } else { + codegen("\ \n\ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\ \n\ @@ -367,6 +713,7 @@ static int do_skeleton(int argc, char **argv) #ifndef %2$s \n\ #define %2$s \n\ \n\ + #include <errno.h> \n\ #include <stdlib.h> \n\ #include <bpf/libbpf.h> \n\ \n\ @@ -375,7 +722,8 @@ static int do_skeleton(int argc, char **argv) struct bpf_object *obj; \n\ ", obj_name, header_guard - ); + ); + } if (map_cnt) { printf("\tstruct {\n"); @@ -383,7 +731,10 @@ static int do_skeleton(int argc, char **argv) ident = get_map_ident(map); if (!ident) continue; - printf("\t\tstruct bpf_map *%s;\n", ident); + if (use_loader) + printf("\t\tstruct bpf_map_desc %s;\n", ident); + else + printf("\t\tstruct bpf_map *%s;\n", ident); } printf("\t} maps;\n"); } @@ -391,14 +742,22 @@ static int do_skeleton(int argc, char **argv) if (prog_cnt) { printf("\tstruct {\n"); bpf_object__for_each_program(prog, obj) { - printf("\t\tstruct bpf_program *%s;\n", - bpf_program__name(prog)); + if (use_loader) + printf("\t\tstruct bpf_prog_desc %s;\n", + bpf_program__name(prog)); + else + printf("\t\tstruct bpf_program *%s;\n", + bpf_program__name(prog)); } printf("\t} progs;\n"); printf("\tstruct {\n"); bpf_object__for_each_program(prog, obj) { - printf("\t\tstruct bpf_link *%s;\n", - bpf_program__name(prog)); + if (use_loader) + printf("\t\tint %s_fd;\n", + bpf_program__name(prog)); + else + printf("\t\tstruct bpf_link *%s;\n", + bpf_program__name(prog)); } printf("\t} links;\n"); } @@ -409,6 +768,10 @@ static int do_skeleton(int argc, char **argv) if (err) goto out; } + if (use_loader) { + err = gen_trace(obj, obj_name, header_guard); + goto out; + } codegen("\ \n\ @@ -431,18 +794,23 @@ static int do_skeleton(int argc, char **argv) %1$s__open_opts(const struct bpf_object_open_opts *opts) \n\ { \n\ struct %1$s *obj; \n\ + int err; \n\ \n\ obj = (struct %1$s *)calloc(1, sizeof(*obj)); \n\ - if (!obj) \n\ + if (!obj) { \n\ + errno = ENOMEM; \n\ return NULL; \n\ - if (%1$s__create_skeleton(obj)) \n\ - goto err; \n\ - if (bpf_object__open_skeleton(obj->skeleton, opts)) \n\ - goto err; \n\ + } \n\ + \n\ + err = %1$s__create_skeleton(obj); \n\ + err = err ?: bpf_object__open_skeleton(obj->skeleton, opts);\n\ + if (err) \n\ + goto err_out; \n\ \n\ return obj; \n\ - err: \n\ + err_out: \n\ %1$s__destroy(obj); \n\ + errno = -err; \n\ return NULL; \n\ } \n\ \n\ @@ -462,12 +830,15 @@ static int do_skeleton(int argc, char **argv) %1$s__open_and_load(void) \n\ { \n\ struct %1$s *obj; \n\ + int err; \n\ \n\ obj = %1$s__open(); \n\ if (!obj) \n\ return NULL; \n\ - if (%1$s__load(obj)) { \n\ + err = %1$s__load(obj); \n\ + if (err) { \n\ %1$s__destroy(obj); \n\ + errno = -err; \n\ return NULL; \n\ } \n\ return obj; \n\ @@ -498,7 +869,7 @@ static int do_skeleton(int argc, char **argv) \n\ s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\ if (!s) \n\ - return -1; \n\ + goto err; \n\ obj->skeleton = s; \n\ \n\ s->sz = sizeof(*s); \n\ @@ -578,19 +949,7 @@ static int do_skeleton(int argc, char **argv) file_sz); /* embed contents of BPF object file */ - for (i = 0, len = 0; i < file_sz; i++) { - int w = obj_data[i] ? 4 : 2; - - len += w; - if (len > 78) { - printf("\\\n"); - len = w; - } - if (!obj_data[i]) - printf("\\0"); - else - printf("\\x%02x", (unsigned char)obj_data[i]); - } + print_hex(obj_data, file_sz); codegen("\ \n\ @@ -599,7 +958,7 @@ static int do_skeleton(int argc, char **argv) return 0; \n\ err: \n\ bpf_object__destroy_skeleton(s); \n\ - return -1; \n\ + return -ENOMEM; \n\ } \n\ \n\ #endif /* %s */ \n\ @@ -636,7 +995,7 @@ static int do_object(int argc, char **argv) while (argc) { file = GET_ARG(); - err = bpf_linker__add_file(linker, file); + err = bpf_linker__add_file(linker, file, NULL); if (err) { p_err("failed to link '%s': %s (%d)", file, strerror(err), err); goto out; diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index d9afb730136a..3ddfd4843738 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -29,6 +29,7 @@ bool show_pinned; bool block_mount; bool verifier_logs; bool relaxed_maps; +bool use_loader; struct btf *base_btf; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; @@ -340,8 +341,10 @@ static int do_batch(int argc, char **argv) n_argc = make_args(buf, n_argv, BATCH_ARG_NB_MAX, lines); if (!n_argc) continue; - if (n_argc < 0) + if (n_argc < 0) { + err = n_argc; goto err_close; + } if (json_output) { jsonw_start_object(json_wtr); @@ -392,6 +395,7 @@ int main(int argc, char **argv) { "mapcompat", no_argument, NULL, 'm' }, { "nomount", no_argument, NULL, 'n' }, { "debug", no_argument, NULL, 'd' }, + { "use-loader", no_argument, NULL, 'L' }, { "base-btf", required_argument, NULL, 'B' }, { 0 } }; @@ -409,7 +413,7 @@ int main(int argc, char **argv) hash_init(link_table.table); opterr = 0; - while ((opt = getopt_long(argc, argv, "VhpjfmndB:", + while ((opt = getopt_long(argc, argv, "VhpjfLmndB:", options, NULL)) >= 0) { switch (opt) { case 'V': @@ -452,6 +456,9 @@ int main(int argc, char **argv) return -1; } break; + case 'L': + use_loader = true; + break; default: p_err("unrecognized option '%s'", argv[optind - 1]); if (json_output) diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 76e91641262b..c1cf29798b99 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -90,6 +90,7 @@ extern bool show_pids; extern bool block_mount; extern bool verifier_logs; extern bool relaxed_maps; +extern bool use_loader; extern struct btf *base_btf; extern struct pinned_obj_table prog_table; extern struct pinned_obj_table map_table; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 3f067d2d7584..cc48726740ad 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -16,6 +16,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <sys/syscall.h> +#include <dirent.h> #include <linux/err.h> #include <linux/perf_event.h> @@ -24,6 +25,8 @@ #include <bpf/bpf.h> #include <bpf/btf.h> #include <bpf/libbpf.h> +#include <bpf/bpf_gen_internal.h> +#include <bpf/skel_internal.h> #include "cfg.h" #include "main.h" @@ -1499,7 +1502,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) set_max_rlimit(); obj = bpf_object__open_file(file, &open_opts); - if (IS_ERR_OR_NULL(obj)) { + if (libbpf_get_error(obj)) { p_err("failed to open object file"); goto err_free_reuse_maps; } @@ -1645,8 +1648,110 @@ err_free_reuse_maps: return -1; } +static int count_open_fds(void) +{ + DIR *dp = opendir("/proc/self/fd"); + struct dirent *de; + int cnt = -3; + + if (!dp) + return -1; + + while ((de = readdir(dp))) + cnt++; + + closedir(dp); + return cnt; +} + +static int try_loader(struct gen_loader_opts *gen) +{ + struct bpf_load_and_run_opts opts = {}; + struct bpf_loader_ctx *ctx; + int ctx_sz = sizeof(*ctx) + 64 * max(sizeof(struct bpf_map_desc), + sizeof(struct bpf_prog_desc)); + int log_buf_sz = (1u << 24) - 1; + int err, fds_before, fd_delta; + char *log_buf; + + ctx = alloca(ctx_sz); + memset(ctx, 0, ctx_sz); + ctx->sz = ctx_sz; + ctx->log_level = 1; + ctx->log_size = log_buf_sz; + log_buf = malloc(log_buf_sz); + if (!log_buf) + return -ENOMEM; + ctx->log_buf = (long) log_buf; + opts.ctx = ctx; + opts.data = gen->data; + opts.data_sz = gen->data_sz; + opts.insns = gen->insns; + opts.insns_sz = gen->insns_sz; + fds_before = count_open_fds(); + err = bpf_load_and_run(&opts); + fd_delta = count_open_fds() - fds_before; + if (err < 0) { + fprintf(stderr, "err %d\n%s\n%s", err, opts.errstr, log_buf); + if (fd_delta) + fprintf(stderr, "loader prog leaked %d FDs\n", + fd_delta); + } + free(log_buf); + return err; +} + +static int do_loader(int argc, char **argv) +{ + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts); + DECLARE_LIBBPF_OPTS(gen_loader_opts, gen); + struct bpf_object_load_attr load_attr = {}; + struct bpf_object *obj; + const char *file; + int err = 0; + + if (!REQ_ARGS(1)) + return -1; + file = GET_ARG(); + + obj = bpf_object__open_file(file, &open_opts); + if (libbpf_get_error(obj)) { + p_err("failed to open object file"); + goto err_close_obj; + } + + err = bpf_object__gen_loader(obj, &gen); + if (err) + goto err_close_obj; + + load_attr.obj = obj; + if (verifier_logs) + /* log_level1 + log_level2 + stats, but not stable UAPI */ + load_attr.log_level = 1 + 2 + 4; + + err = bpf_object__load_xattr(&load_attr); + if (err) { + p_err("failed to load object file"); + goto err_close_obj; + } + + if (verifier_logs) { + struct dump_data dd = {}; + + kernel_syms_load(&dd); + dump_xlated_plain(&dd, (void *)gen.insns, gen.insns_sz, false, false); + kernel_syms_destroy(&dd); + } + err = try_loader(&gen); +err_close_obj: + bpf_object__close(obj); + return err; +} + static int do_load(int argc, char **argv) { + if (use_loader) + return do_loader(argc, argv); return load_with_options(argc, argv, true); } @@ -2138,7 +2243,7 @@ static int do_help(int argc, char **argv) " cgroup/getpeername4 | cgroup/getpeername6 |\n" " cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n" " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" - " cgroup/getsockopt | cgroup/setsockopt |\n" + " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" " flow_dissector }\n" diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c index 6fc3e6f7f40c..f1f32e21d5cd 100644 --- a/tools/bpf/bpftool/xlated_dumper.c +++ b/tools/bpf/bpftool/xlated_dumper.c @@ -196,6 +196,9 @@ static const char *print_imm(void *private_data, else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), "map[id:%u][0]+%u", insn->imm, (insn + 1)->imm); + else if (insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) + snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), + "map[idx:%u]+%u", insn->imm, (insn + 1)->imm); else if (insn->src_reg == BPF_PSEUDO_FUNC) snprintf(dd->scratch_buff, sizeof(dd->scratch_buff), "subprog[%+d]", insn->imm); diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index cd72016c3cfa..715092fc6a23 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -51,39 +51,39 @@ subdir-obj-y := build-file := $(dir)/Build -include $(build-file) -quiet_cmd_flex = FLEX $@ -quiet_cmd_bison = BISON $@ +quiet_cmd_flex = FLEX $@ +quiet_cmd_bison = BISON $@ # Create directory unless it exists -quiet_cmd_mkdir = MKDIR $(dir $@) +quiet_cmd_mkdir = MKDIR $(dir $@) cmd_mkdir = mkdir -p $(dir $@) rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir)) # Compile command -quiet_cmd_cc_o_c = CC $@ +quiet_cmd_cc_o_c = CC $@ cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< -quiet_cmd_host_cc_o_c = HOSTCC $@ +quiet_cmd_host_cc_o_c = HOSTCC $@ cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $< -quiet_cmd_cxx_o_c = CXX $@ +quiet_cmd_cxx_o_c = CXX $@ cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $< -quiet_cmd_cpp_i_c = CPP $@ +quiet_cmd_cpp_i_c = CPP $@ cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $< -quiet_cmd_cc_s_c = AS $@ +quiet_cmd_cc_s_c = AS $@ cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< -quiet_cmd_gen = GEN $@ +quiet_cmd_gen = GEN $@ # Link agregate command # If there's nothing to link, create empty $@ object. -quiet_cmd_ld_multi = LD $@ +quiet_cmd_ld_multi = LD $@ cmd_ld_multi = $(if $(strip $(obj-y)),\ $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@) -quiet_cmd_host_ld_multi = HOSTLD $@ +quiet_cmd_host_ld_multi = HOSTLD $@ cmd_host_ld_multi = $(if $(strip $(obj-y)),\ $(HOSTLD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(HOSTAR) rcs $@) diff --git a/tools/include/asm/alternative-asm.h b/tools/include/asm/alternative.h index b54bd860dff6..b54bd860dff6 100644 --- a/tools/include/asm/alternative-asm.h +++ b/tools/include/asm/alternative.h diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 7f475d59a097..87d112650dfb 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -22,7 +22,7 @@ #include <linux/build_bug.h> #define GENMASK_INPUT_CHECK(h, l) \ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __builtin_constant_p((l) > (h)), (l) > (h), 0))) + __is_constexpr((l) > (h)), (l) > (h), 0))) #else /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h index 81b8aae5a855..435ddd72d2c4 100644 --- a/tools/include/linux/const.h +++ b/tools/include/linux/const.h @@ -3,4 +3,12 @@ #include <vdso/const.h> +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de> + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + #endif /* _LINUX_CONST_H */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index ce58cff99b66..6de5a7fc066b 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -863,9 +863,18 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) +#define __NR_quotactl_path 443 +__SYSCALL(__NR_quotactl_path, sys_quotactl_path) + +#define __NR_landlock_create_ruleset 444 +__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) +#define __NR_landlock_add_rule 445 +__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) +#define __NR_landlock_restrict_self 446 +__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) #undef __NR_syscalls -#define __NR_syscalls 443 +#define __NR_syscalls 447 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 0827037c5484..67b94bc3c885 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -625,30 +625,147 @@ struct drm_gem_open { __u64 size; }; +/** + * DRM_CAP_DUMB_BUFFER + * + * If set to 1, the driver supports creating dumb buffers via the + * &DRM_IOCTL_MODE_CREATE_DUMB ioctl. + */ #define DRM_CAP_DUMB_BUFFER 0x1 +/** + * DRM_CAP_VBLANK_HIGH_CRTC + * + * If set to 1, the kernel supports specifying a CRTC index in the high bits of + * &drm_wait_vblank_request.type. + * + * Starting kernel version 2.6.39, this capability is always set to 1. + */ #define DRM_CAP_VBLANK_HIGH_CRTC 0x2 +/** + * DRM_CAP_DUMB_PREFERRED_DEPTH + * + * The preferred bit depth for dumb buffers. + * + * The bit depth is the number of bits used to indicate the color of a single + * pixel excluding any padding. This is different from the number of bits per + * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per + * pixel. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ #define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 +/** + * DRM_CAP_DUMB_PREFER_SHADOW + * + * If set to 1, the driver prefers userspace to render to a shadow buffer + * instead of directly rendering to a dumb buffer. For best speed, userspace + * should do streaming ordered memory copies into the dumb buffer and never + * read from it. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ #define DRM_CAP_DUMB_PREFER_SHADOW 0x4 +/** + * DRM_CAP_PRIME + * + * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT + * and &DRM_PRIME_CAP_EXPORT. + * + * PRIME buffers are exposed as dma-buf file descriptors. See + * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing". + */ #define DRM_CAP_PRIME 0x5 +/** + * DRM_PRIME_CAP_IMPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME + * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. + */ #define DRM_PRIME_CAP_IMPORT 0x1 +/** + * DRM_PRIME_CAP_EXPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME + * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. + */ #define DRM_PRIME_CAP_EXPORT 0x2 +/** + * DRM_CAP_TIMESTAMP_MONOTONIC + * + * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in + * struct drm_event_vblank. If set to 1, the kernel will report timestamps with + * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these + * clocks. + * + * Starting from kernel version 2.6.39, the default value for this capability + * is 1. Starting kernel version 4.15, this capability is always set to 1. + */ #define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 +/** + * DRM_CAP_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC. + */ #define DRM_CAP_ASYNC_PAGE_FLIP 0x7 -/* - * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight - * combination for the hardware cursor. The intention is that a hardware - * agnostic userspace can query a cursor plane size to use. +/** + * DRM_CAP_CURSOR_WIDTH + * + * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid + * width x height combination for the hardware cursor. The intention is that a + * hardware agnostic userspace can query a cursor plane size to use. * * Note that the cross-driver contract is to merely return a valid size; * drivers are free to attach another meaning on top, eg. i915 returns the * maximum plane size. */ #define DRM_CAP_CURSOR_WIDTH 0x8 +/** + * DRM_CAP_CURSOR_HEIGHT + * + * See &DRM_CAP_CURSOR_WIDTH. + */ #define DRM_CAP_CURSOR_HEIGHT 0x9 +/** + * DRM_CAP_ADDFB2_MODIFIERS + * + * If set to 1, the driver supports supplying modifiers in the + * &DRM_IOCTL_MODE_ADDFB2 ioctl. + */ #define DRM_CAP_ADDFB2_MODIFIERS 0x10 +/** + * DRM_CAP_PAGE_FLIP_TARGET + * + * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and + * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in + * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP + * ioctl. + */ #define DRM_CAP_PAGE_FLIP_TARGET 0x11 +/** + * DRM_CAP_CRTC_IN_VBLANK_EVENT + * + * If set to 1, the kernel supports reporting the CRTC ID in + * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and + * &DRM_EVENT_FLIP_COMPLETE events. + * + * Starting kernel version 4.12, this capability is always set to 1. + */ #define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 +/** + * DRM_CAP_SYNCOBJ + * + * If set to 1, the driver supports sync objects. See + * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + */ #define DRM_CAP_SYNCOBJ 0x13 +/** + * DRM_CAP_SYNCOBJ_TIMELINE + * + * If set to 1, the driver supports timeline operations on sync objects. See + * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + */ #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 /* DRM_IOCTL_GET_CAP ioctl argument type */ diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 1987e2ea79a3..ddc47bbf48b6 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -943,6 +943,7 @@ struct drm_i915_gem_exec_object { __u64 offset; }; +/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */ struct drm_i915_gem_execbuffer { /** * List of buffers to be validated with their relocations to be diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ec6d85a81744..bf9252c7381e 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -527,6 +527,15 @@ union bpf_iter_link_info { * Look up an element with the given *key* in the map referred to * by the file descriptor *fd*, and if found, delete the element. * + * For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map + * types, the *flags* argument needs to be set to 0, but for other + * map types, it may be specified as: + * + * **BPF_F_LOCK** + * Look up and delete the value of a spin-locked map + * without returning the lock. This must be specified if + * the elements contain a spinlock. + * * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types * implement this command as a "pop" operation, deleting the top * element rather than one corresponding to *key*. @@ -536,6 +545,10 @@ union bpf_iter_link_info { * This command is only valid for the following map types: * * **BPF_MAP_TYPE_QUEUE** * * **BPF_MAP_TYPE_STACK** + * * **BPF_MAP_TYPE_HASH** + * * **BPF_MAP_TYPE_PERCPU_HASH** + * * **BPF_MAP_TYPE_LRU_HASH** + * * **BPF_MAP_TYPE_LRU_PERCPU_HASH** * * Return * Returns zero on success. On error, -1 is returned and *errno* @@ -837,6 +850,7 @@ enum bpf_cmd { BPF_PROG_ATTACH, BPF_PROG_DETACH, BPF_PROG_TEST_RUN, + BPF_PROG_RUN = BPF_PROG_TEST_RUN, BPF_PROG_GET_NEXT_ID, BPF_MAP_GET_NEXT_ID, BPF_PROG_GET_FD_BY_ID, @@ -937,6 +951,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, + BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ }; enum bpf_attach_type { @@ -979,6 +994,8 @@ enum bpf_attach_type { BPF_SK_LOOKUP, BPF_XDP, BPF_SK_SKB_VERDICT, + BPF_SK_REUSEPORT_SELECT, + BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, __MAX_BPF_ATTACH_TYPE }; @@ -1097,8 +1114,8 @@ enum bpf_link_type { /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * - * insn[0].src_reg: BPF_PSEUDO_MAP_FD - * insn[0].imm: map fd + * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX] + * insn[0].imm: map fd or fd_idx * insn[1].imm: 0 * insn[0].off: 0 * insn[1].off: 0 @@ -1106,15 +1123,19 @@ enum bpf_link_type { * verifier type: CONST_PTR_TO_MAP */ #define BPF_PSEUDO_MAP_FD 1 -/* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE - * insn[0].imm: map fd +#define BPF_PSEUDO_MAP_IDX 5 + +/* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE + * insn[0].imm: map fd or fd_idx * insn[1].imm: offset into value * insn[0].off: 0 * insn[1].off: 0 * ldimm64 rewrite: address of map[0]+offset * verifier type: PTR_TO_MAP_VALUE */ -#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_VALUE 2 +#define BPF_PSEUDO_MAP_IDX_VALUE 6 + /* insn[0].src_reg: BPF_PSEUDO_BTF_ID * insn[0].imm: kernel btd id of VAR * insn[1].imm: 0 @@ -1314,6 +1335,8 @@ union bpf_attr { /* or valid module BTF object fd or 0 to attach to vmlinux */ __u32 attach_btf_obj_fd; }; + __u32 :32; /* pad */ + __aligned_u64 fd_array; /* array of FDs */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -2534,8 +2557,12 @@ union bpf_attr { * The lower two bits of *flags* are used as the return code if * the map lookup fails. This is so that the return value can be * one of the XDP program return codes up to **XDP_TX**, as chosen - * by the caller. Any higher bits in the *flags* argument must be - * unset. + * by the caller. The higher bits of *flags* can be set to + * BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below. + * + * With BPF_F_BROADCAST the packet will be broadcasted to all the + * interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress + * interface will be excluded when do broadcasting. * * See also **bpf_redirect**\ (), which only supports redirecting * to an ifindex, but doesn't require a map to do so. @@ -4735,6 +4762,24 @@ union bpf_attr { * be zero-terminated except when **str_size** is 0. * * Or **-EBUSY** if the per-CPU memory copy buffer is busy. + * + * long bpf_sys_bpf(u32 cmd, void *attr, u32 attr_size) + * Description + * Execute bpf syscall with given arguments. + * Return + * A syscall result. + * + * long bpf_btf_find_by_name_kind(char *name, int name_sz, u32 kind, int flags) + * Description + * Find BTF type with given name and kind in vmlinux BTF or in module's BTFs. + * Return + * Returns btf_id and btf_obj_fd in lower and upper 32 bits. + * + * long bpf_sys_close(u32 fd) + * Description + * Execute close syscall for given FD. + * Return + * A syscall result. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4903,6 +4948,9 @@ union bpf_attr { FN(check_mtu), \ FN(for_each_map_elem), \ FN(snprintf), \ + FN(sys_bpf), \ + FN(btf_find_by_name_kind), \ + FN(sys_close), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5080,6 +5128,12 @@ enum { BPF_F_BPRM_SECUREEXEC = (1ULL << 0), }; +/* Flags for bpf_redirect_map helper */ +enum { + BPF_F_BROADCAST = (1ULL << 3), + BPF_F_EXCLUDE_INGRESS = (1ULL << 4), +}; + #define __bpf_md_ptr(type, name) \ union { \ type name; \ @@ -5364,6 +5418,20 @@ struct sk_reuseport_md { __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */ __u32 bind_inany; /* Is sock bound to an INANY address? */ __u32 hash; /* A hash of the packet 4 tuples */ + /* When reuse->migrating_sk is NULL, it is selecting a sk for the + * new incoming connection request (e.g. selecting a listen sk for + * the received SYN in the TCP case). reuse->sk is one of the sk + * in the reuseport group. The bpf prog can use reuse->sk to learn + * the local listening ip/port without looking into the skb. + * + * When reuse->migrating_sk is not NULL, reuse->sk is closed and + * reuse->migrating_sk is the socket that needs to be migrated + * to another listening socket. migrating_sk could be a fullsock + * sk that is fully established or a reqsk that is in-the-middle + * of 3-way handshake. + */ + __bpf_md_ptr(struct bpf_sock *, sk); + __bpf_md_ptr(struct bpf_sock *, migrating_sk); }; #define BPF_TAG_SIZE 8 diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index f44eb0a04afd..4c32e97dcdf0 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -185,7 +185,7 @@ struct fsxattr { #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) /* - * A jump here: 130-131 are reserved for zoned block devices + * A jump here: 130-136 are reserved for zoned block devices * (see uapi/linux/blkzoned.h) */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index f6afee209620..79d9c44d1ad7 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -8,6 +8,7 @@ * Note: you must update KVM_API_VERSION if you change this interface. */ +#include <linux/const.h> #include <linux/types.h> #include <linux/compiler.h> #include <linux/ioctl.h> @@ -1078,6 +1079,10 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DIRTY_LOG_RING 192 #define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_PPC_DAWR1 194 +#define KVM_CAP_SET_GUEST_DEBUG2 195 +#define KVM_CAP_SGX_ATTRIBUTE 196 +#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 +#define KVM_CAP_PTP_KVM 198 #ifdef KVM_CAP_IRQ_ROUTING @@ -1671,6 +1676,8 @@ enum sev_cmd_id { KVM_SEV_CERT_EXPORT, /* Attestation report */ KVM_SEV_GET_ATTESTATION_REPORT, + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, KVM_SEV_NR_MAX, }; @@ -1729,6 +1736,45 @@ struct kvm_sev_attestation_report { __u32 len; }; +struct kvm_sev_send_start { + __u32 policy; + __u64 pdh_cert_uaddr; + __u32 pdh_cert_len; + __u64 plat_certs_uaddr; + __u32 plat_certs_len; + __u64 amd_certs_uaddr; + __u32 amd_certs_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_send_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; + __u64 pdh_uaddr; + __u32 pdh_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_receive_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) @@ -1834,8 +1880,8 @@ struct kvm_hyperv_eventfd { * conversion after harvesting an entry. Also, it must not skip any * dirty bits, so that dirty bits are always harvested in sequence. */ -#define KVM_DIRTY_GFN_F_DIRTY BIT(0) -#define KVM_DIRTY_GFN_F_RESET BIT(1) +#define KVM_DIRTY_GFN_F_DIRTY _BITUL(0) +#define KVM_DIRTY_GFN_F_RESET _BITUL(1) #define KVM_DIRTY_GFN_F_MASK 0x3 /* diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 14332f4cf816..f92880a15645 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -127,6 +127,7 @@ enum perf_sw_ids { PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_DUMMY = 9, PERF_COUNT_SW_BPF_OUTPUT = 10, + PERF_COUNT_SW_CGROUP_SWITCHES = 11, PERF_COUNT_SW_MAX, /* non-ABI */ }; @@ -326,6 +327,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ #define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ #define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -404,7 +406,10 @@ struct perf_event_attr { cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ build_id : 1, /* use build id in mmap2 events */ - __reserved_1 : 29; + inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ + remove_on_exec : 1, /* event is removed from task on exec */ + sigtrap : 1, /* send synchronous SIGTRAP on event */ + __reserved_1 : 26; union { __u32 wakeup_events; /* wakeup every n events */ @@ -456,6 +461,12 @@ struct perf_event_attr { __u16 __reserved_2; __u32 aux_sample_size; __u32 __reserved_3; + + /* + * User provided data if sigtrap=1, passed back to user via + * siginfo_t::si_perf_data, e.g. to permit user to identify the event. + */ + __u64 sig_data; }; /* @@ -1171,10 +1182,15 @@ enum perf_callchain_context { /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ + +/* CoreSight PMU AUX buffer formats */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 667f1aed091c..18a9f59dc067 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -255,4 +255,8 @@ struct prctl_mm_map { # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 +/* Set/get enabled arm64 pointer authentication keys */ +#define PR_PAC_SET_ENABLED_KEYS 60 +#define PR_PAC_GET_ENABLED_KEYS 61 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index feaf46451e83..3a9f2037bd23 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -111,7 +111,7 @@ OPTIONS --tracepoints:: retrieve statistics from tracepoints -*z*:: +-z:: --skip-zero-records:: omit records with all zeros in logging mode diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 9b057cc7650a..430f6874fa41 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,3 +1,3 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ - btf_dump.o ringbuf.o strset.o linker.o + btf_dump.o ringbuf.o strset.o linker.o gen_loader.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index e43e1896cb4b..ec14aa725bb0 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -223,18 +223,14 @@ install_lib: all_cmd $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) +INSTALL_HEADERS = bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \ + bpf_helpers.h $(BPF_HELPER_DEFS) bpf_tracing.h \ + bpf_endian.h bpf_core_read.h skel_internal.h + install_headers: $(BPF_HELPER_DEFS) - $(call QUIET_INSTALL, headers) \ - $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ - $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \ - $(call do_install,btf.h,$(prefix)/include/bpf,644); \ - $(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \ - $(call do_install,xsk.h,$(prefix)/include/bpf,644); \ - $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \ - $(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \ - $(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \ - $(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \ - $(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644); + $(call QUIET_INSTALL, headers) \ + $(foreach hdr,$(INSTALL_HEADERS), \ + $(call do_install,$(hdr),$(prefix)/include/bpf,644);) install_pkgconfig: $(PC_FILE) $(call QUIET_INSTALL, $(PC_FILE)) \ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index bba48ff4c5c0..86dcac44f32f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -80,6 +80,7 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { union bpf_attr attr; + int fd; memset(&attr, '\0', sizeof(attr)); @@ -102,7 +103,8 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) else attr.inner_map_fd = create_attr->inner_map_fd; - return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_create_map_node(enum bpf_map_type map_type, const char *name, @@ -160,6 +162,7 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, __u32 map_flags, int node) { union bpf_attr attr; + int fd; memset(&attr, '\0', sizeof(attr)); @@ -178,7 +181,8 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name, attr.numa_node = node; } - return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, @@ -222,10 +226,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) int fd; if (!load_attr->log_buf != !load_attr->log_buf_sz) - return -EINVAL; + return libbpf_err(-EINVAL); if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf)) - return -EINVAL; + return libbpf_err(-EINVAL); memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; @@ -281,8 +285,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) load_attr->func_info_cnt, load_attr->func_info_rec_size, attr.func_info_rec_size); - if (!finfo) + if (!finfo) { + errno = E2BIG; goto done; + } attr.func_info = ptr_to_u64(finfo); attr.func_info_rec_size = load_attr->func_info_rec_size; @@ -293,8 +299,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) load_attr->line_info_cnt, load_attr->line_info_rec_size, attr.line_info_rec_size); - if (!linfo) + if (!linfo) { + errno = E2BIG; goto done; + } attr.line_info = ptr_to_u64(linfo); attr.line_info_rec_size = load_attr->line_info_rec_size; @@ -318,9 +326,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) fd = sys_bpf_prog_load(&attr, sizeof(attr)); done: + /* free() doesn't affect errno, so we don't need to restore it */ free(finfo); free(linfo); - return fd; + return libbpf_err_errno(fd); } int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, @@ -329,7 +338,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, struct bpf_prog_load_params p = {}; if (!load_attr || !log_buf != !log_buf_sz) - return -EINVAL; + return libbpf_err(-EINVAL); p.prog_type = load_attr->prog_type; p.expected_attach_type = load_attr->expected_attach_type; @@ -391,6 +400,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, int log_level) { union bpf_attr attr; + int fd; memset(&attr, 0, sizeof(attr)); attr.prog_type = type; @@ -404,13 +414,15 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.kern_version = kern_version; attr.prog_flags = prog_flags; - return sys_bpf_prog_load(&attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags) { union bpf_attr attr; + int ret; memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; @@ -418,24 +430,28 @@ int bpf_map_update_elem(int fd, const void *key, const void *value, attr.value = ptr_to_u64(value); attr.flags = flags; - return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } int bpf_map_lookup_elem(int fd, const void *key, void *value) { union bpf_attr attr; + int ret; memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); - return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) { union bpf_attr attr; + int ret; memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; @@ -443,17 +459,33 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) attr.value = ptr_to_u64(value); attr.flags = flags; - return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) { union bpf_attr attr; + int ret; + + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.value = ptr_to_u64(value); + + ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); + return libbpf_err_errno(ret); +} + +int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags) +{ + union bpf_attr attr; memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); + attr.flags = flags; return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); } @@ -461,34 +493,40 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) int bpf_map_delete_elem(int fd, const void *key) { union bpf_attr attr; + int ret; memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); - return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } int bpf_map_get_next_key(int fd, const void *key, void *next_key) { union bpf_attr attr; + int ret; memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.next_key = ptr_to_u64(next_key); - return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } int bpf_map_freeze(int fd) { union bpf_attr attr; + int ret; memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; - return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } static int bpf_map_batch_common(int cmd, int fd, void *in_batch, @@ -500,7 +538,7 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, int ret; if (!OPTS_VALID(opts, bpf_map_batch_opts)) - return -EINVAL; + return libbpf_err(-EINVAL); memset(&attr, 0, sizeof(attr)); attr.batch.map_fd = fd; @@ -515,7 +553,7 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, ret = sys_bpf(cmd, &attr, sizeof(attr)); *count = attr.batch.count; - return ret; + return libbpf_err_errno(ret); } int bpf_map_delete_batch(int fd, void *keys, __u32 *count, @@ -552,22 +590,26 @@ int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count, int bpf_obj_pin(int fd, const char *pathname) { union bpf_attr attr; + int ret; memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); attr.bpf_fd = fd; - return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); + ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } int bpf_obj_get(const char *pathname) { union bpf_attr attr; + int fd; memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); - return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); + fd = sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, @@ -585,9 +627,10 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd, const struct bpf_prog_attach_opts *opts) { union bpf_attr attr; + int ret; if (!OPTS_VALID(opts, bpf_prog_attach_opts)) - return -EINVAL; + return libbpf_err(-EINVAL); memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; @@ -596,30 +639,35 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd, attr.attach_flags = OPTS_GET(opts, flags, 0); attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0); - return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { union bpf_attr attr; + int ret; memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_type = type; - return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) { union bpf_attr attr; + int ret; memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; - return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } int bpf_link_create(int prog_fd, int target_fd, @@ -628,15 +676,16 @@ int bpf_link_create(int prog_fd, int target_fd, { __u32 target_btf_id, iter_info_len; union bpf_attr attr; + int fd; if (!OPTS_VALID(opts, bpf_link_create_opts)) - return -EINVAL; + return libbpf_err(-EINVAL); iter_info_len = OPTS_GET(opts, iter_info_len, 0); target_btf_id = OPTS_GET(opts, target_btf_id, 0); if (iter_info_len && target_btf_id) - return -EINVAL; + return libbpf_err(-EINVAL); memset(&attr, 0, sizeof(attr)); attr.link_create.prog_fd = prog_fd; @@ -652,26 +701,30 @@ int bpf_link_create(int prog_fd, int target_fd, attr.link_create.target_btf_id = target_btf_id; } - return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); + fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_link_detach(int link_fd) { union bpf_attr attr; + int ret; memset(&attr, 0, sizeof(attr)); attr.link_detach.link_fd = link_fd; - return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr)); + ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } int bpf_link_update(int link_fd, int new_prog_fd, const struct bpf_link_update_opts *opts) { union bpf_attr attr; + int ret; if (!OPTS_VALID(opts, bpf_link_update_opts)) - return -EINVAL; + return libbpf_err(-EINVAL); memset(&attr, 0, sizeof(attr)); attr.link_update.link_fd = link_fd; @@ -679,17 +732,20 @@ int bpf_link_update(int link_fd, int new_prog_fd, attr.link_update.flags = OPTS_GET(opts, flags, 0); attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); - return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr)); + ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } int bpf_iter_create(int link_fd) { union bpf_attr attr; + int fd; memset(&attr, 0, sizeof(attr)); attr.iter_create.link_fd = link_fd; - return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr)); + fd = sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, @@ -706,10 +762,12 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, attr.query.prog_ids = ptr_to_u64(prog_ids); ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr)); + if (attach_flags) *attach_flags = attr.query.attach_flags; *prog_cnt = attr.query.prog_cnt; - return ret; + + return libbpf_err_errno(ret); } int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, @@ -727,13 +785,15 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, attr.test.repeat = repeat; ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + if (size_out) *size_out = attr.test.data_size_out; if (retval) *retval = attr.test.retval; if (duration) *duration = attr.test.duration; - return ret; + + return libbpf_err_errno(ret); } int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) @@ -742,7 +802,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) int ret; if (!test_attr->data_out && test_attr->data_size_out > 0) - return -EINVAL; + return libbpf_err(-EINVAL); memset(&attr, 0, sizeof(attr)); attr.test.prog_fd = test_attr->prog_fd; @@ -757,11 +817,13 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) attr.test.repeat = test_attr->repeat; ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + test_attr->data_size_out = attr.test.data_size_out; test_attr->ctx_size_out = attr.test.ctx_size_out; test_attr->retval = attr.test.retval; test_attr->duration = attr.test.duration; - return ret; + + return libbpf_err_errno(ret); } int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) @@ -770,7 +832,7 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) int ret; if (!OPTS_VALID(opts, bpf_test_run_opts)) - return -EINVAL; + return libbpf_err(-EINVAL); memset(&attr, 0, sizeof(attr)); attr.test.prog_fd = prog_fd; @@ -788,11 +850,13 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL)); ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + OPTS_SET(opts, data_size_out, attr.test.data_size_out); OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out); OPTS_SET(opts, duration, attr.test.duration); OPTS_SET(opts, retval, attr.test.retval); - return ret; + + return libbpf_err_errno(ret); } static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd) @@ -807,7 +871,7 @@ static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd) if (!err) *next_id = attr.next_id; - return err; + return libbpf_err_errno(err); } int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) @@ -833,41 +897,49 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id) int bpf_prog_get_fd_by_id(__u32 id) { union bpf_attr attr; + int fd; memset(&attr, 0, sizeof(attr)); attr.prog_id = id; - return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_map_get_fd_by_id(__u32 id) { union bpf_attr attr; + int fd; memset(&attr, 0, sizeof(attr)); attr.map_id = id; - return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_btf_get_fd_by_id(__u32 id) { union bpf_attr attr; + int fd; memset(&attr, 0, sizeof(attr)); attr.btf_id = id; - return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_link_get_fd_by_id(__u32 id) { union bpf_attr attr; + int fd; memset(&attr, 0, sizeof(attr)); attr.link_id = id; - return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len) @@ -881,21 +953,24 @@ int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len) attr.info.info = ptr_to_u64(info); err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); + if (!err) *info_len = attr.info.info_len; - return err; + return libbpf_err_errno(err); } int bpf_raw_tracepoint_open(const char *name, int prog_fd) { union bpf_attr attr; + int fd; memset(&attr, 0, sizeof(attr)); attr.raw_tracepoint.name = ptr_to_u64(name); attr.raw_tracepoint.prog_fd = prog_fd; - return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); + fd = sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, @@ -915,12 +990,13 @@ retry: } fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr)); - if (fd == -1 && !do_log && log_buf && log_buf_size) { + + if (fd < 0 && !do_log && log_buf && log_buf_size) { do_log = true; goto retry; } - return fd; + return libbpf_err_errno(fd); } int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, @@ -937,37 +1013,42 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, attr.task_fd_query.buf_len = *buf_len; err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr)); + *buf_len = attr.task_fd_query.buf_len; *prog_id = attr.task_fd_query.prog_id; *fd_type = attr.task_fd_query.fd_type; *probe_offset = attr.task_fd_query.probe_offset; *probe_addr = attr.task_fd_query.probe_addr; - return err; + return libbpf_err_errno(err); } int bpf_enable_stats(enum bpf_stats_type type) { union bpf_attr attr; + int fd; memset(&attr, 0, sizeof(attr)); attr.enable_stats.type = type; - return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr)); + fd = sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr)); + return libbpf_err_errno(fd); } int bpf_prog_bind_map(int prog_fd, int map_fd, const struct bpf_prog_bind_opts *opts) { union bpf_attr attr; + int ret; if (!OPTS_VALID(opts, bpf_prog_bind_opts)) - return -EINVAL; + return libbpf_err(-EINVAL); memset(&attr, 0, sizeof(attr)); attr.prog_bind_map.prog_fd = prog_fd; attr.prog_bind_map.map_fd = map_fd; attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0); - return sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr)); + return libbpf_err_errno(ret); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 875dde20d56e..4f758f8f50cd 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -124,6 +124,8 @@ LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags); LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value); +LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, + void *value, __u64 flags); LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); LIBBPF_API int bpf_map_freeze(int fd); diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h new file mode 100644 index 000000000000..615400391e57 --- /dev/null +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2021 Facebook */ +#ifndef __BPF_GEN_INTERNAL_H +#define __BPF_GEN_INTERNAL_H + +struct ksym_relo_desc { + const char *name; + int kind; + int insn_idx; +}; + +struct bpf_gen { + struct gen_loader_opts *opts; + void *data_start; + void *data_cur; + void *insn_start; + void *insn_cur; + ssize_t cleanup_label; + __u32 nr_progs; + __u32 nr_maps; + int log_level; + int error; + struct ksym_relo_desc *relos; + int relo_cnt; + char attach_target[128]; + int attach_kind; +}; + +void bpf_gen__init(struct bpf_gen *gen, int log_level); +int bpf_gen__finish(struct bpf_gen *gen); +void bpf_gen__free(struct bpf_gen *gen); +void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); +void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_attr *map_attr, int map_idx); +struct bpf_prog_load_params; +void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx); +void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size); +void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx); +void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type); +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, int insn_idx); + +#endif diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 9720dc0b4605..b9987c3efa3c 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -158,4 +158,70 @@ enum libbpf_tristate { #define __kconfig __attribute__((section(".kconfig"))) #define __ksym __attribute__((section(".ksyms"))) +#ifndef ___bpf_concat +#define ___bpf_concat(a, b) a ## b +#endif +#ifndef ___bpf_apply +#define ___bpf_apply(fn, n) ___bpf_concat(fn, n) +#endif +#ifndef ___bpf_nth +#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N +#endif +#ifndef ___bpf_narg +#define ___bpf_narg(...) \ + ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#endif + +#define ___bpf_fill0(arr, p, x) do {} while (0) +#define ___bpf_fill1(arr, p, x) arr[p] = x +#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args) +#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args) +#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args) +#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args) +#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args) +#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args) +#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args) +#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args) +#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args) +#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args) +#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args) +#define ___bpf_fill(arr, args...) \ + ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args) + +/* + * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values + * in a structure. + */ +#define BPF_SEQ_PRINTF(seq, fmt, args...) \ +({ \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[___bpf_narg(args)]; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ + \ + bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \ + ___param, sizeof(___param)); \ +}) + +/* + * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of + * an array of u64. + */ +#define BPF_SNPRINTF(out, out_size, fmt, args...) \ +({ \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[___bpf_narg(args)]; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ + \ + bpf_snprintf(out, out_size, ___fmt, \ + ___param, sizeof(___param)); \ +}) + #endif diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c index 3ed1a27b5f7c..5c503096ef43 100644 --- a/tools/lib/bpf/bpf_prog_linfo.c +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -106,7 +106,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) nr_linfo = info->nr_line_info; if (!nr_linfo) - return NULL; + return errno = EINVAL, NULL; /* * The min size that bpf_prog_linfo has to access for @@ -114,11 +114,11 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) */ if (info->line_info_rec_size < offsetof(struct bpf_line_info, file_name_off)) - return NULL; + return errno = EINVAL, NULL; prog_linfo = calloc(1, sizeof(*prog_linfo)); if (!prog_linfo) - return NULL; + return errno = ENOMEM, NULL; /* Copy xlated line_info */ prog_linfo->nr_linfo = nr_linfo; @@ -174,7 +174,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) err_free: bpf_prog_linfo__free(prog_linfo); - return NULL; + return errno = EINVAL, NULL; } const struct bpf_line_info * @@ -186,11 +186,11 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, const __u64 *jited_linfo; if (func_idx >= prog_linfo->nr_jited_func) - return NULL; + return errno = ENOENT, NULL; nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx]; if (nr_skip >= nr_linfo) - return NULL; + return errno = ENOENT, NULL; start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip; jited_rec_size = prog_linfo->jited_rec_size; @@ -198,7 +198,7 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, (start * jited_rec_size); jited_linfo = raw_jited_linfo; if (addr < *jited_linfo) - return NULL; + return errno = ENOENT, NULL; nr_linfo -= nr_skip; rec_size = prog_linfo->rec_size; @@ -225,13 +225,13 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, nr_linfo = prog_linfo->nr_linfo; if (nr_skip >= nr_linfo) - return NULL; + return errno = ENOENT, NULL; rec_size = prog_linfo->rec_size; raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size); linfo = raw_linfo; if (insn_off < linfo->insn_off) - return NULL; + return errno = ENOENT, NULL; nr_linfo -= nr_skip; for (i = 0; i < nr_linfo; i++) { diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 8c954ebc0c7c..d6bfbe009296 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -25,26 +25,35 @@ #define bpf_target_sparc #define bpf_target_defined #else - #undef bpf_target_defined -#endif /* Fall back to what the compiler says */ -#ifndef bpf_target_defined #if defined(__x86_64__) #define bpf_target_x86 + #define bpf_target_defined #elif defined(__s390__) #define bpf_target_s390 + #define bpf_target_defined #elif defined(__arm__) #define bpf_target_arm + #define bpf_target_defined #elif defined(__aarch64__) #define bpf_target_arm64 + #define bpf_target_defined #elif defined(__mips__) #define bpf_target_mips + #define bpf_target_defined #elif defined(__powerpc__) #define bpf_target_powerpc + #define bpf_target_defined #elif defined(__sparc__) #define bpf_target_sparc + #define bpf_target_defined +#endif /* no compiler target */ + #endif + +#ifndef __BPF_TARGET_MISSING +#define __BPF_TARGET_MISSING "GCC error \"Must specify a BPF target arch via __TARGET_ARCH_xxx\"" #endif #if defined(bpf_target_x86) @@ -287,7 +296,7 @@ struct pt_regs; #elif defined(bpf_target_sparc) #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#else +#elif defined(bpf_target_defined) #define BPF_KPROBE_READ_RET_IP(ip, ctx) \ ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \ @@ -295,13 +304,48 @@ struct pt_regs; (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) #endif +#if !defined(bpf_target_defined) + +#define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM3(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM4(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM5(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_RET(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_FP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_RC(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_SP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_IP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) + +#define PT_REGS_PARM1_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM2_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM3_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM4_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_PARM5_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_RET_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_FP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_RC_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_SP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define PT_REGS_IP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) + +#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) +#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) + +#endif /* !defined(bpf_target_defined) */ + +#ifndef ___bpf_concat #define ___bpf_concat(a, b) a ## b +#endif +#ifndef ___bpf_apply #define ___bpf_apply(fn, n) ___bpf_concat(fn, n) +#endif +#ifndef ___bpf_nth #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N +#endif +#ifndef ___bpf_narg #define ___bpf_narg(...) \ ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) -#define ___bpf_empty(...) \ - ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) +#endif #define ___bpf_ctx_cast0() ctx #define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] @@ -413,56 +457,4 @@ typeof(name(0)) name(struct pt_regs *ctx) \ } \ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) -#define ___bpf_fill0(arr, p, x) do {} while (0) -#define ___bpf_fill1(arr, p, x) arr[p] = x -#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args) -#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args) -#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args) -#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args) -#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args) -#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args) -#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args) -#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args) -#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args) -#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args) -#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args) -#define ___bpf_fill(arr, args...) \ - ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args) - -/* - * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values - * in a structure. - */ -#define BPF_SEQ_PRINTF(seq, fmt, args...) \ -({ \ - static const char ___fmt[] = fmt; \ - unsigned long long ___param[___bpf_narg(args)]; \ - \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - ___bpf_fill(___param, args); \ - _Pragma("GCC diagnostic pop") \ - \ - bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \ - ___param, sizeof(___param)); \ -}) - -/* - * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of - * an array of u64. - */ -#define BPF_SNPRINTF(out, out_size, fmt, args...) \ -({ \ - static const char ___fmt[] = fmt; \ - unsigned long long ___param[___bpf_narg(args)]; \ - \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - ___bpf_fill(___param, args); \ - _Pragma("GCC diagnostic pop") \ - \ - bpf_snprintf(out, out_size, ___fmt, \ - ___param, sizeof(___param)); \ -}) - #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index d57e13a13798..b46760b93bb4 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -443,7 +443,7 @@ struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id) const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) { if (type_id >= btf->start_id + btf->nr_types) - return NULL; + return errno = EINVAL, NULL; return btf_type_by_id((struct btf *)btf, type_id); } @@ -510,7 +510,7 @@ size_t btf__pointer_size(const struct btf *btf) int btf__set_pointer_size(struct btf *btf, size_t ptr_sz) { if (ptr_sz != 4 && ptr_sz != 8) - return -EINVAL; + return libbpf_err(-EINVAL); btf->ptr_sz = ptr_sz; return 0; } @@ -537,7 +537,7 @@ enum btf_endianness btf__endianness(const struct btf *btf) int btf__set_endianness(struct btf *btf, enum btf_endianness endian) { if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN) - return -EINVAL; + return libbpf_err(-EINVAL); btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN); if (!btf->swapped_endian) { @@ -568,8 +568,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) int i; t = btf__type_by_id(btf, type_id); - for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); - i++) { + for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) { switch (btf_kind(t)) { case BTF_KIND_INT: case BTF_KIND_STRUCT: @@ -592,12 +591,12 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) case BTF_KIND_ARRAY: array = btf_array(t); if (nelems && array->nelems > UINT32_MAX / nelems) - return -E2BIG; + return libbpf_err(-E2BIG); nelems *= array->nelems; type_id = array->type; break; default: - return -EINVAL; + return libbpf_err(-EINVAL); } t = btf__type_by_id(btf, type_id); @@ -605,9 +604,9 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) done: if (size < 0) - return -EINVAL; + return libbpf_err(-EINVAL); if (nelems && size > UINT32_MAX / nelems) - return -E2BIG; + return libbpf_err(-E2BIG); return nelems * size; } @@ -640,7 +639,7 @@ int btf__align_of(const struct btf *btf, __u32 id) for (i = 0; i < vlen; i++, m++) { align = btf__align_of(btf, m->type); if (align <= 0) - return align; + return libbpf_err(align); max_align = max(max_align, align); } @@ -648,7 +647,7 @@ int btf__align_of(const struct btf *btf, __u32 id) } default: pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); - return 0; + return errno = EINVAL, 0; } } @@ -667,7 +666,7 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id) } if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t)) - return -EINVAL; + return libbpf_err(-EINVAL); return type_id; } @@ -687,7 +686,7 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name) return i; } - return -ENOENT; + return libbpf_err(-ENOENT); } __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, @@ -709,7 +708,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, return i; } - return -ENOENT; + return libbpf_err(-ENOENT); } static bool btf_is_modifiable(const struct btf *btf) @@ -785,12 +784,12 @@ static struct btf *btf_new_empty(struct btf *base_btf) struct btf *btf__new_empty(void) { - return btf_new_empty(NULL); + return libbpf_ptr(btf_new_empty(NULL)); } struct btf *btf__new_empty_split(struct btf *base_btf) { - return btf_new_empty(base_btf); + return libbpf_ptr(btf_new_empty(base_btf)); } static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) @@ -846,7 +845,7 @@ done: struct btf *btf__new(const void *data, __u32 size) { - return btf_new(data, size, NULL); + return libbpf_ptr(btf_new(data, size, NULL)); } static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, @@ -937,7 +936,8 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, goto done; } btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf); - if (IS_ERR(btf)) + err = libbpf_get_error(btf); + if (err) goto done; switch (gelf_getclass(elf)) { @@ -953,9 +953,9 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, } if (btf_ext && btf_ext_data) { - *btf_ext = btf_ext__new(btf_ext_data->d_buf, - btf_ext_data->d_size); - if (IS_ERR(*btf_ext)) + *btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); + err = libbpf_get_error(*btf_ext); + if (err) goto done; } else if (btf_ext) { *btf_ext = NULL; @@ -965,30 +965,24 @@ done: elf_end(elf); close(fd); - if (err) - return ERR_PTR(err); - /* - * btf is always parsed before btf_ext, so no need to clean up - * btf_ext, if btf loading failed - */ - if (IS_ERR(btf)) + if (!err) return btf; - if (btf_ext && IS_ERR(*btf_ext)) { - btf__free(btf); - err = PTR_ERR(*btf_ext); - return ERR_PTR(err); - } - return btf; + + if (btf_ext) + btf_ext__free(*btf_ext); + btf__free(btf); + + return ERR_PTR(err); } struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) { - return btf_parse_elf(path, NULL, btf_ext); + return libbpf_ptr(btf_parse_elf(path, NULL, btf_ext)); } struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf) { - return btf_parse_elf(path, base_btf, NULL); + return libbpf_ptr(btf_parse_elf(path, base_btf, NULL)); } static struct btf *btf_parse_raw(const char *path, struct btf *base_btf) @@ -1056,36 +1050,39 @@ err_out: struct btf *btf__parse_raw(const char *path) { - return btf_parse_raw(path, NULL); + return libbpf_ptr(btf_parse_raw(path, NULL)); } struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf) { - return btf_parse_raw(path, base_btf); + return libbpf_ptr(btf_parse_raw(path, base_btf)); } static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext) { struct btf *btf; + int err; if (btf_ext) *btf_ext = NULL; btf = btf_parse_raw(path, base_btf); - if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO) + err = libbpf_get_error(btf); + if (!err) return btf; - + if (err != -EPROTO) + return ERR_PTR(err); return btf_parse_elf(path, base_btf, btf_ext); } struct btf *btf__parse(const char *path, struct btf_ext **btf_ext) { - return btf_parse(path, NULL, btf_ext); + return libbpf_ptr(btf_parse(path, NULL, btf_ext)); } struct btf *btf__parse_split(const char *path, struct btf *base_btf) { - return btf_parse(path, base_btf, NULL); + return libbpf_ptr(btf_parse(path, base_btf, NULL)); } static int compare_vsi_off(const void *_a, const void *_b) @@ -1178,7 +1175,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf) } } - return err; + return libbpf_err(err); } static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); @@ -1191,13 +1188,13 @@ int btf__load(struct btf *btf) int err = 0; if (btf->fd >= 0) - return -EEXIST; + return libbpf_err(-EEXIST); retry_load: if (log_buf_size) { log_buf = malloc(log_buf_size); if (!log_buf) - return -ENOMEM; + return libbpf_err(-ENOMEM); *log_buf = 0; } @@ -1229,7 +1226,7 @@ retry_load: done: free(log_buf); - return err; + return libbpf_err(err); } int btf__fd(const struct btf *btf) @@ -1305,7 +1302,7 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size) data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian); if (!data) - return NULL; + return errno = -ENOMEM, NULL; btf->raw_size = data_sz; if (btf->swapped_endian) @@ -1323,7 +1320,7 @@ const char *btf__str_by_offset(const struct btf *btf, __u32 offset) else if (offset - btf->start_str_off < btf->hdr->str_len) return btf_strs_data(btf) + (offset - btf->start_str_off); else - return NULL; + return errno = EINVAL, NULL; } const char *btf__name_by_offset(const struct btf *btf, __u32 offset) @@ -1388,17 +1385,20 @@ exit_free: int btf__get_from_id(__u32 id, struct btf **btf) { struct btf *res; - int btf_fd; + int err, btf_fd; *btf = NULL; btf_fd = bpf_btf_get_fd_by_id(id); if (btf_fd < 0) - return -errno; + return libbpf_err(-errno); res = btf_get_from_fd(btf_fd, NULL); + err = libbpf_get_error(res); + close(btf_fd); - if (IS_ERR(res)) - return PTR_ERR(res); + + if (err) + return libbpf_err(err); *btf = res; return 0; @@ -1415,31 +1415,30 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, __s64 key_size, value_size; __s32 container_id; - if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == - max_name) { + if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == max_name) { pr_warn("map:%s length of '____btf_map_%s' is too long\n", map_name, map_name); - return -EINVAL; + return libbpf_err(-EINVAL); } container_id = btf__find_by_name(btf, container_name); if (container_id < 0) { pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", map_name, container_name); - return container_id; + return libbpf_err(container_id); } container_type = btf__type_by_id(btf, container_id); if (!container_type) { pr_warn("map:%s cannot find BTF type for container_id:%u\n", map_name, container_id); - return -EINVAL; + return libbpf_err(-EINVAL); } if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) { pr_warn("map:%s container_name:%s is an invalid container struct\n", map_name, container_name); - return -EINVAL; + return libbpf_err(-EINVAL); } key = btf_members(container_type); @@ -1448,25 +1447,25 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, key_size = btf__resolve_size(btf, key->type); if (key_size < 0) { pr_warn("map:%s invalid BTF key_type_size\n", map_name); - return key_size; + return libbpf_err(key_size); } if (expected_key_size != key_size) { pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", map_name, (__u32)key_size, expected_key_size); - return -EINVAL; + return libbpf_err(-EINVAL); } value_size = btf__resolve_size(btf, value->type); if (value_size < 0) { pr_warn("map:%s invalid BTF value_type_size\n", map_name); - return value_size; + return libbpf_err(value_size); } if (expected_value_size != value_size) { pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", map_name, (__u32)value_size, expected_value_size); - return -EINVAL; + return libbpf_err(-EINVAL); } *key_type_id = key->type; @@ -1563,11 +1562,11 @@ int btf__find_str(struct btf *btf, const char *s) /* BTF needs to be in a modifiable state to build string lookup index */ if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); off = strset__find_str(btf->strs_set, s); if (off < 0) - return off; + return libbpf_err(off); return btf->start_str_off + off; } @@ -1588,11 +1587,11 @@ int btf__add_str(struct btf *btf, const char *s) } if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); off = strset__add_str(btf->strs_set, s); if (off < 0) - return off; + return libbpf_err(off); btf->hdr->str_len = strset__data_size(btf->strs_set); @@ -1616,7 +1615,7 @@ static int btf_commit_type(struct btf *btf, int data_sz) err = btf_add_type_idx_entry(btf, btf->hdr->type_len); if (err) - return err; + return libbpf_err(err); btf->hdr->type_len += data_sz; btf->hdr->str_off += data_sz; @@ -1653,21 +1652,21 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t sz = btf_type_size(src_type); if (sz < 0) - return sz; + return libbpf_err(sz); /* deconstruct BTF, if necessary, and invalidate raw_data */ if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); t = btf_add_type_mem(btf, sz); if (!t) - return -ENOMEM; + return libbpf_err(-ENOMEM); memcpy(t, src_type, sz); err = btf_type_visit_str_offs(t, btf_rewrite_str, &p); if (err) - return err; + return libbpf_err(err); return btf_commit_type(btf, sz); } @@ -1688,21 +1687,21 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding /* non-empty name */ if (!name || !name[0]) - return -EINVAL; + return libbpf_err(-EINVAL); /* byte_sz must be power of 2 */ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16) - return -EINVAL; + return libbpf_err(-EINVAL); if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL)) - return -EINVAL; + return libbpf_err(-EINVAL); /* deconstruct BTF, if necessary, and invalidate raw_data */ if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); sz = sizeof(struct btf_type) + sizeof(int); t = btf_add_type_mem(btf, sz); if (!t) - return -ENOMEM; + return libbpf_err(-ENOMEM); /* if something goes wrong later, we might end up with an extra string, * but that shouldn't be a problem, because BTF can't be constructed @@ -1736,20 +1735,20 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz) /* non-empty name */ if (!name || !name[0]) - return -EINVAL; + return libbpf_err(-EINVAL); /* byte_sz must be one of the explicitly allowed values */ if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 && byte_sz != 16) - return -EINVAL; + return libbpf_err(-EINVAL); if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); if (!t) - return -ENOMEM; + return libbpf_err(-ENOMEM); name_off = btf__add_str(btf, name); if (name_off < 0) @@ -1780,15 +1779,15 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref int sz, name_off = 0; if (validate_type_id(ref_type_id)) - return -EINVAL; + return libbpf_err(-EINVAL); if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); if (!t) - return -ENOMEM; + return libbpf_err(-ENOMEM); if (name && name[0]) { name_off = btf__add_str(btf, name); @@ -1831,15 +1830,15 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n int sz; if (validate_type_id(index_type_id) || validate_type_id(elem_type_id)) - return -EINVAL; + return libbpf_err(-EINVAL); if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); sz = sizeof(struct btf_type) + sizeof(struct btf_array); t = btf_add_type_mem(btf, sz); if (!t) - return -ENOMEM; + return libbpf_err(-ENOMEM); t->name_off = 0; t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0); @@ -1860,12 +1859,12 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 int sz, name_off = 0; if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); if (!t) - return -ENOMEM; + return libbpf_err(-ENOMEM); if (name && name[0]) { name_off = btf__add_str(btf, name); @@ -1943,30 +1942,30 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, /* last type should be union/struct */ if (btf->nr_types == 0) - return -EINVAL; + return libbpf_err(-EINVAL); t = btf_last_type(btf); if (!btf_is_composite(t)) - return -EINVAL; + return libbpf_err(-EINVAL); if (validate_type_id(type_id)) - return -EINVAL; + return libbpf_err(-EINVAL); /* best-effort bit field offset/size enforcement */ is_bitfield = bit_size || (bit_offset % 8 != 0); if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff)) - return -EINVAL; + return libbpf_err(-EINVAL); /* only offset 0 is allowed for unions */ if (btf_is_union(t) && bit_offset) - return -EINVAL; + return libbpf_err(-EINVAL); /* decompose and invalidate raw data */ if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); sz = sizeof(struct btf_member); m = btf_add_type_mem(btf, sz); if (!m) - return -ENOMEM; + return libbpf_err(-ENOMEM); if (name && name[0]) { name_off = btf__add_str(btf, name); @@ -2008,15 +2007,15 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz) /* byte_sz must be power of 2 */ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8) - return -EINVAL; + return libbpf_err(-EINVAL); if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); if (!t) - return -ENOMEM; + return libbpf_err(-ENOMEM); if (name && name[0]) { name_off = btf__add_str(btf, name); @@ -2048,25 +2047,25 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) /* last type should be BTF_KIND_ENUM */ if (btf->nr_types == 0) - return -EINVAL; + return libbpf_err(-EINVAL); t = btf_last_type(btf); if (!btf_is_enum(t)) - return -EINVAL; + return libbpf_err(-EINVAL); /* non-empty name */ if (!name || !name[0]) - return -EINVAL; + return libbpf_err(-EINVAL); if (value < INT_MIN || value > UINT_MAX) - return -E2BIG; + return libbpf_err(-E2BIG); /* decompose and invalidate raw data */ if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); sz = sizeof(struct btf_enum); v = btf_add_type_mem(btf, sz); if (!v) - return -ENOMEM; + return libbpf_err(-ENOMEM); name_off = btf__add_str(btf, name); if (name_off < 0) @@ -2096,7 +2095,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind) { if (!name || !name[0]) - return -EINVAL; + return libbpf_err(-EINVAL); switch (fwd_kind) { case BTF_FWD_STRUCT: @@ -2117,7 +2116,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind) */ return btf__add_enum(btf, name, sizeof(int)); default: - return -EINVAL; + return libbpf_err(-EINVAL); } } @@ -2132,7 +2131,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind) int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id) { if (!name || !name[0]) - return -EINVAL; + return libbpf_err(-EINVAL); return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id); } @@ -2187,10 +2186,10 @@ int btf__add_func(struct btf *btf, const char *name, int id; if (!name || !name[0]) - return -EINVAL; + return libbpf_err(-EINVAL); if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL && linkage != BTF_FUNC_EXTERN) - return -EINVAL; + return libbpf_err(-EINVAL); id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id); if (id > 0) { @@ -2198,7 +2197,7 @@ int btf__add_func(struct btf *btf, const char *name, t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0); } - return id; + return libbpf_err(id); } /* @@ -2219,15 +2218,15 @@ int btf__add_func_proto(struct btf *btf, int ret_type_id) int sz; if (validate_type_id(ret_type_id)) - return -EINVAL; + return libbpf_err(-EINVAL); if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); if (!t) - return -ENOMEM; + return libbpf_err(-ENOMEM); /* start out with vlen=0; this will be adjusted when adding enum * values, if necessary @@ -2254,23 +2253,23 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id) int sz, name_off = 0; if (validate_type_id(type_id)) - return -EINVAL; + return libbpf_err(-EINVAL); /* last type should be BTF_KIND_FUNC_PROTO */ if (btf->nr_types == 0) - return -EINVAL; + return libbpf_err(-EINVAL); t = btf_last_type(btf); if (!btf_is_func_proto(t)) - return -EINVAL; + return libbpf_err(-EINVAL); /* decompose and invalidate raw data */ if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); sz = sizeof(struct btf_param); p = btf_add_type_mem(btf, sz); if (!p) - return -ENOMEM; + return libbpf_err(-ENOMEM); if (name && name[0]) { name_off = btf__add_str(btf, name); @@ -2308,21 +2307,21 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) /* non-empty name */ if (!name || !name[0]) - return -EINVAL; + return libbpf_err(-EINVAL); if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED && linkage != BTF_VAR_GLOBAL_EXTERN) - return -EINVAL; + return libbpf_err(-EINVAL); if (validate_type_id(type_id)) - return -EINVAL; + return libbpf_err(-EINVAL); /* deconstruct BTF, if necessary, and invalidate raw_data */ if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); sz = sizeof(struct btf_type) + sizeof(struct btf_var); t = btf_add_type_mem(btf, sz); if (!t) - return -ENOMEM; + return libbpf_err(-ENOMEM); name_off = btf__add_str(btf, name); if (name_off < 0) @@ -2357,15 +2356,15 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz) /* non-empty name */ if (!name || !name[0]) - return -EINVAL; + return libbpf_err(-EINVAL); if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); if (!t) - return -ENOMEM; + return libbpf_err(-ENOMEM); name_off = btf__add_str(btf, name); if (name_off < 0) @@ -2397,22 +2396,22 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ /* last type should be BTF_KIND_DATASEC */ if (btf->nr_types == 0) - return -EINVAL; + return libbpf_err(-EINVAL); t = btf_last_type(btf); if (!btf_is_datasec(t)) - return -EINVAL; + return libbpf_err(-EINVAL); if (validate_type_id(var_type_id)) - return -EINVAL; + return libbpf_err(-EINVAL); /* decompose and invalidate raw data */ if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); sz = sizeof(struct btf_var_secinfo); v = btf_add_type_mem(btf, sz); if (!v) - return -ENOMEM; + return libbpf_err(-ENOMEM); v->type = var_type_id; v->offset = offset; @@ -2614,11 +2613,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size) err = btf_ext_parse_hdr(data, size); if (err) - return ERR_PTR(err); + return libbpf_err_ptr(err); btf_ext = calloc(1, sizeof(struct btf_ext)); if (!btf_ext) - return ERR_PTR(-ENOMEM); + return libbpf_err_ptr(-ENOMEM); btf_ext->data_size = size; btf_ext->data = malloc(size); @@ -2628,9 +2627,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size) } memcpy(btf_ext->data, data, size); - if (btf_ext->hdr->hdr_len < - offsetofend(struct btf_ext_header, line_info_len)) + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { + err = -EINVAL; goto done; + } + err = btf_ext_setup_func_info(btf_ext); if (err) goto done; @@ -2639,8 +2640,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size) if (err) goto done; - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) { + err = -EINVAL; goto done; + } + err = btf_ext_setup_core_relos(btf_ext); if (err) goto done; @@ -2648,7 +2652,7 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size) done: if (err) { btf_ext__free(btf_ext); - return ERR_PTR(err); + return libbpf_err_ptr(err); } return btf_ext; @@ -2687,7 +2691,7 @@ static int btf_ext_reloc_info(const struct btf *btf, existing_len = (*cnt) * record_size; data = realloc(*info, existing_len + records_len); if (!data) - return -ENOMEM; + return libbpf_err(-ENOMEM); memcpy(data + existing_len, sinfo->data, records_len); /* adjust insn_off only, the rest data will be passed @@ -2697,15 +2701,14 @@ static int btf_ext_reloc_info(const struct btf *btf, __u32 *insn_off; insn_off = data + existing_len + (i * record_size); - *insn_off = *insn_off / sizeof(struct bpf_insn) + - insns_cnt; + *insn_off = *insn_off / sizeof(struct bpf_insn) + insns_cnt; } *info = data; *cnt += sinfo->num_info; return 0; } - return -ENOENT; + return libbpf_err(-ENOENT); } int btf_ext__reloc_func_info(const struct btf *btf, @@ -2894,11 +2897,11 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, if (IS_ERR(d)) { pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); - return -EINVAL; + return libbpf_err(-EINVAL); } if (btf_ensure_modifiable(btf)) - return -ENOMEM; + return libbpf_err(-ENOMEM); err = btf_dedup_prep(d); if (err) { @@ -2938,7 +2941,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, done: btf_dedup_free(d); - return err; + return libbpf_err(err); } #define BTF_UNPROCESSED_ID ((__u32)-1) @@ -4411,7 +4414,7 @@ struct btf *libbpf_find_kernel_btf(void) char path[PATH_MAX + 1]; struct utsname buf; struct btf *btf; - int i; + int i, err; uname(&buf); @@ -4425,17 +4428,16 @@ struct btf *libbpf_find_kernel_btf(void) btf = btf__parse_raw(path); else btf = btf__parse_elf(path, NULL); - - pr_debug("loading kernel BTF '%s': %ld\n", - path, IS_ERR(btf) ? PTR_ERR(btf) : 0); - if (IS_ERR(btf)) + err = libbpf_get_error(btf); + pr_debug("loading kernel BTF '%s': %d\n", path, err); + if (err) continue; return btf; } pr_warn("failed to find valid kernel BTF\n"); - return ERR_PTR(-ESRCH); + return libbpf_err_ptr(-ESRCH); } int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx) diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 5e2809d685bf..5dc6b5172bb3 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -128,7 +128,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf, d = calloc(1, sizeof(struct btf_dump)); if (!d) - return ERR_PTR(-ENOMEM); + return libbpf_err_ptr(-ENOMEM); d->btf = btf; d->btf_ext = btf_ext; @@ -156,7 +156,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf, return d; err: btf_dump__free(d); - return ERR_PTR(err); + return libbpf_err_ptr(err); } static int btf_dump_resize(struct btf_dump *d) @@ -236,16 +236,16 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) int err, i; if (id > btf__get_nr_types(d->btf)) - return -EINVAL; + return libbpf_err(-EINVAL); err = btf_dump_resize(d); if (err) - return err; + return libbpf_err(err); d->emit_queue_cnt = 0; err = btf_dump_order_type(d, id, false); if (err < 0) - return err; + return libbpf_err(err); for (i = 0; i < d->emit_queue_cnt; i++) btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/); @@ -1075,11 +1075,11 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, int lvl, err; if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts)) - return -EINVAL; + return libbpf_err(-EINVAL); err = btf_dump_resize(d); if (err) - return -EINVAL; + return libbpf_err(err); fname = OPTS_GET(opts, field_name, ""); lvl = OPTS_GET(opts, indent_level, 0); diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c new file mode 100644 index 000000000000..8df718a6b142 --- /dev/null +++ b/tools/lib/bpf/gen_loader.c @@ -0,0 +1,729 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2021 Facebook */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <linux/filter.h> +#include "btf.h" +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_internal.h" +#include "hashmap.h" +#include "bpf_gen_internal.h" +#include "skel_internal.h" + +#define MAX_USED_MAPS 64 +#define MAX_USED_PROGS 32 + +/* The following structure describes the stack layout of the loader program. + * In addition R6 contains the pointer to context. + * R7 contains the result of the last sys_bpf command (typically error or FD). + * R9 contains the result of the last sys_close command. + * + * Naming convention: + * ctx - bpf program context + * stack - bpf program stack + * blob - bpf_attr-s, strings, insns, map data. + * All the bytes that loader prog will use for read/write. + */ +struct loader_stack { + __u32 btf_fd; + __u32 map_fd[MAX_USED_MAPS]; + __u32 prog_fd[MAX_USED_PROGS]; + __u32 inner_map_fd; +}; + +#define stack_off(field) \ + (__s16)(-sizeof(struct loader_stack) + offsetof(struct loader_stack, field)) + +#define attr_field(attr, field) (attr + offsetof(union bpf_attr, field)) + +static int realloc_insn_buf(struct bpf_gen *gen, __u32 size) +{ + size_t off = gen->insn_cur - gen->insn_start; + void *insn_start; + + if (gen->error) + return gen->error; + if (size > INT32_MAX || off + size > INT32_MAX) { + gen->error = -ERANGE; + return -ERANGE; + } + insn_start = realloc(gen->insn_start, off + size); + if (!insn_start) { + gen->error = -ENOMEM; + free(gen->insn_start); + gen->insn_start = NULL; + return -ENOMEM; + } + gen->insn_start = insn_start; + gen->insn_cur = insn_start + off; + return 0; +} + +static int realloc_data_buf(struct bpf_gen *gen, __u32 size) +{ + size_t off = gen->data_cur - gen->data_start; + void *data_start; + + if (gen->error) + return gen->error; + if (size > INT32_MAX || off + size > INT32_MAX) { + gen->error = -ERANGE; + return -ERANGE; + } + data_start = realloc(gen->data_start, off + size); + if (!data_start) { + gen->error = -ENOMEM; + free(gen->data_start); + gen->data_start = NULL; + return -ENOMEM; + } + gen->data_start = data_start; + gen->data_cur = data_start + off; + return 0; +} + +static void emit(struct bpf_gen *gen, struct bpf_insn insn) +{ + if (realloc_insn_buf(gen, sizeof(insn))) + return; + memcpy(gen->insn_cur, &insn, sizeof(insn)); + gen->insn_cur += sizeof(insn); +} + +static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn insn2) +{ + emit(gen, insn1); + emit(gen, insn2); +} + +void bpf_gen__init(struct bpf_gen *gen, int log_level) +{ + size_t stack_sz = sizeof(struct loader_stack); + int i; + + gen->log_level = log_level; + /* save ctx pointer into R6 */ + emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1)); + + /* bzero stack */ + emit(gen, BPF_MOV64_REG(BPF_REG_1, BPF_REG_10)); + emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -stack_sz)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, stack_sz)); + emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); + + /* jump over cleanup code */ + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, + /* size of cleanup code below */ + (stack_sz / 4) * 3 + 2)); + + /* remember the label where all error branches will jump to */ + gen->cleanup_label = gen->insn_cur - gen->insn_start; + /* emit cleanup code: close all temp FDs */ + for (i = 0; i < stack_sz; i += 4) { + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i)); + emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close)); + } + /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */ + emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + emit(gen, BPF_EXIT_INSN()); +} + +static int add_data(struct bpf_gen *gen, const void *data, __u32 size) +{ + void *prev; + + if (realloc_data_buf(gen, size)) + return 0; + prev = gen->data_cur; + memcpy(gen->data_cur, data, size); + gen->data_cur += size; + return prev - gen->data_start; +} + +static int insn_bytes_to_bpf_size(__u32 sz) +{ + switch (sz) { + case 8: return BPF_DW; + case 4: return BPF_W; + case 2: return BPF_H; + case 1: return BPF_B; + default: return -1; + } +} + +/* *(u64 *)(blob + off) = (u64)(void *)(blob + data) */ +static void emit_rel_store(struct bpf_gen *gen, int off, int data) +{ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, data)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, off)); + emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); +} + +/* *(u64 *)(blob + off) = (u64)(void *)(%sp + stack_off) */ +static void emit_rel_store_sp(struct bpf_gen *gen, int off, int stack_off) +{ + emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_10)); + emit(gen, BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, stack_off)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, off)); + emit(gen, BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0)); +} + +static void move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off, + bool check_non_zero) +{ + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_6, ctx_off)); + if (check_non_zero) + /* If value in ctx is zero don't update the blob. + * For example: when ctx->map.max_entries == 0, keep default max_entries from bpf.c + */ + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, off)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0)); +} + +static void move_stack2blob(struct bpf_gen *gen, int off, int size, int stack_off) +{ + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, off)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0)); +} + +static void move_stack2ctx(struct bpf_gen *gen, int ctx_off, int size, int stack_off) +{ + emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off)); + emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off)); +} + +static void emit_sys_bpf(struct bpf_gen *gen, int cmd, int attr, int attr_size) +{ + emit(gen, BPF_MOV64_IMM(BPF_REG_1, cmd)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, attr)); + emit(gen, BPF_MOV64_IMM(BPF_REG_3, attr_size)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_bpf)); + /* remember the result in R7 */ + emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); +} + +static bool is_simm16(__s64 value) +{ + return value == (__s64)(__s16)value; +} + +static void emit_check_err(struct bpf_gen *gen) +{ + __s64 off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1; + + /* R7 contains result of last sys_bpf command. + * if (R7 < 0) goto cleanup; + */ + if (is_simm16(off)) { + emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, off)); + } else { + gen->error = -ERANGE; + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1)); + } +} + +/* reg1 and reg2 should not be R1 - R5. They can be R0, R6 - R10 */ +static void emit_debug(struct bpf_gen *gen, int reg1, int reg2, + const char *fmt, va_list args) +{ + char buf[1024]; + int addr, len, ret; + + if (!gen->log_level) + return; + ret = vsnprintf(buf, sizeof(buf), fmt, args); + if (ret < 1024 - 7 && reg1 >= 0 && reg2 < 0) + /* The special case to accommodate common debug_ret(): + * to avoid specifying BPF_REG_7 and adding " r=%%d" to + * prints explicitly. + */ + strcat(buf, " r=%d"); + len = strlen(buf) + 1; + addr = add_data(gen, buf, len); + + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, addr)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); + if (reg1 >= 0) + emit(gen, BPF_MOV64_REG(BPF_REG_3, reg1)); + if (reg2 >= 0) + emit(gen, BPF_MOV64_REG(BPF_REG_4, reg2)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_trace_printk)); +} + +static void debug_regs(struct bpf_gen *gen, int reg1, int reg2, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + emit_debug(gen, reg1, reg2, fmt, args); + va_end(args); +} + +static void debug_ret(struct bpf_gen *gen, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + emit_debug(gen, BPF_REG_7, -1, fmt, args); + va_end(args); +} + +static void __emit_sys_close(struct bpf_gen *gen) +{ + emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, + /* 2 is the number of the following insns + * * 6 is additional insns in debug_regs + */ + 2 + (gen->log_level ? 6 : 0))); + emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_1)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close)); + debug_regs(gen, BPF_REG_9, BPF_REG_0, "close(%%d) = %%d"); +} + +static void emit_sys_close_stack(struct bpf_gen *gen, int stack_off) +{ + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, stack_off)); + __emit_sys_close(gen); +} + +static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off) +{ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, blob_off)); + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0)); + __emit_sys_close(gen); +} + +int bpf_gen__finish(struct bpf_gen *gen) +{ + int i; + + emit_sys_close_stack(gen, stack_off(btf_fd)); + for (i = 0; i < gen->nr_progs; i++) + move_stack2ctx(gen, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * gen->nr_maps + + sizeof(struct bpf_prog_desc) * i + + offsetof(struct bpf_prog_desc, prog_fd), 4, + stack_off(prog_fd[i])); + for (i = 0; i < gen->nr_maps; i++) + move_stack2ctx(gen, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * i + + offsetof(struct bpf_map_desc, map_fd), 4, + stack_off(map_fd[i])); + emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); + emit(gen, BPF_EXIT_INSN()); + pr_debug("gen: finish %d\n", gen->error); + if (!gen->error) { + struct gen_loader_opts *opts = gen->opts; + + opts->insns = gen->insn_start; + opts->insns_sz = gen->insn_cur - gen->insn_start; + opts->data = gen->data_start; + opts->data_sz = gen->data_cur - gen->data_start; + } + return gen->error; +} + +void bpf_gen__free(struct bpf_gen *gen) +{ + if (!gen) + return; + free(gen->data_start); + free(gen->insn_start); + free(gen); +} + +void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, + __u32 btf_raw_size) +{ + int attr_size = offsetofend(union bpf_attr, btf_log_level); + int btf_data, btf_load_attr; + union bpf_attr attr; + + memset(&attr, 0, attr_size); + pr_debug("gen: load_btf: size %d\n", btf_raw_size); + btf_data = add_data(gen, btf_raw_data, btf_raw_size); + + attr.btf_size = btf_raw_size; + btf_load_attr = add_data(gen, &attr, attr_size); + + /* populate union bpf_attr with user provided log details */ + move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_level), 4, + offsetof(struct bpf_loader_ctx, log_level), false); + move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_size), 4, + offsetof(struct bpf_loader_ctx, log_size), false); + move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_buf), 8, + offsetof(struct bpf_loader_ctx, log_buf), false); + /* populate union bpf_attr with a pointer to the BTF data */ + emit_rel_store(gen, attr_field(btf_load_attr, btf), btf_data); + /* emit BTF_LOAD command */ + emit_sys_bpf(gen, BPF_BTF_LOAD, btf_load_attr, attr_size); + debug_ret(gen, "btf_load size %d", btf_raw_size); + emit_check_err(gen); + /* remember btf_fd in the stack, if successful */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, stack_off(btf_fd))); +} + +void bpf_gen__map_create(struct bpf_gen *gen, + struct bpf_create_map_attr *map_attr, int map_idx) +{ + int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); + bool close_inner_map_fd = false; + int map_create_attr; + union bpf_attr attr; + + memset(&attr, 0, attr_size); + attr.map_type = map_attr->map_type; + attr.key_size = map_attr->key_size; + attr.value_size = map_attr->value_size; + attr.map_flags = map_attr->map_flags; + memcpy(attr.map_name, map_attr->name, + min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1)); + attr.numa_node = map_attr->numa_node; + attr.map_ifindex = map_attr->map_ifindex; + attr.max_entries = map_attr->max_entries; + switch (attr.map_type) { + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_STACK_TRACE: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH_OF_MAPS: + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: + case BPF_MAP_TYPE_CPUMAP: + case BPF_MAP_TYPE_XSKMAP: + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_RINGBUF: + break; + default: + attr.btf_key_type_id = map_attr->btf_key_type_id; + attr.btf_value_type_id = map_attr->btf_value_type_id; + } + + pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", + attr.map_name, map_idx, map_attr->map_type, attr.btf_value_type_id); + + map_create_attr = add_data(gen, &attr, attr_size); + if (attr.btf_value_type_id) + /* populate union bpf_attr with btf_fd saved in the stack earlier */ + move_stack2blob(gen, attr_field(map_create_attr, btf_fd), 4, + stack_off(btf_fd)); + switch (attr.map_type) { + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH_OF_MAPS: + move_stack2blob(gen, attr_field(map_create_attr, inner_map_fd), 4, + stack_off(inner_map_fd)); + close_inner_map_fd = true; + break; + default: + break; + } + /* conditionally update max_entries */ + if (map_idx >= 0) + move_ctx2blob(gen, attr_field(map_create_attr, max_entries), 4, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * map_idx + + offsetof(struct bpf_map_desc, max_entries), + true /* check that max_entries != 0 */); + /* emit MAP_CREATE command */ + emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); + debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", + attr.map_name, map_idx, map_attr->map_type, attr.value_size, + attr.btf_value_type_id); + emit_check_err(gen); + /* remember map_fd in the stack, if successful */ + if (map_idx < 0) { + /* This bpf_gen__map_create() function is called with map_idx >= 0 + * for all maps that libbpf loading logic tracks. + * It's called with -1 to create an inner map. + */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, + stack_off(inner_map_fd))); + } else if (map_idx != gen->nr_maps) { + gen->error = -EDOM; /* internal bug */ + return; + } else { + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, + stack_off(map_fd[map_idx]))); + gen->nr_maps++; + } + if (close_inner_map_fd) + emit_sys_close_stack(gen, stack_off(inner_map_fd)); +} + +void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name, + enum bpf_attach_type type) +{ + const char *prefix; + int kind, ret; + + btf_get_kernel_prefix_kind(type, &prefix, &kind); + gen->attach_kind = kind; + ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s", + prefix, attach_name); + if (ret == sizeof(gen->attach_target)) + gen->error = -ENOSPC; +} + +static void emit_find_attach_target(struct bpf_gen *gen) +{ + int name, len = strlen(gen->attach_target) + 1; + + pr_debug("gen: find_attach_tgt %s %d\n", gen->attach_target, gen->attach_kind); + name = add_data(gen, gen->attach_target, len); + + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, name)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); + emit(gen, BPF_MOV64_IMM(BPF_REG_3, gen->attach_kind)); + emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind)); + emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); + debug_ret(gen, "find_by_name_kind(%s,%d)", + gen->attach_target, gen->attach_kind); + emit_check_err(gen); + /* if successful, btf_id is in lower 32-bit of R7 and + * btf_obj_fd is in upper 32-bit + */ +} + +void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, int kind, + int insn_idx) +{ + struct ksym_relo_desc *relo; + + relo = libbpf_reallocarray(gen->relos, gen->relo_cnt + 1, sizeof(*relo)); + if (!relo) { + gen->error = -ENOMEM; + return; + } + gen->relos = relo; + relo += gen->relo_cnt; + relo->name = name; + relo->kind = kind; + relo->insn_idx = insn_idx; + gen->relo_cnt++; +} + +static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns) +{ + int name, insn, len = strlen(relo->name) + 1; + + pr_debug("gen: emit_relo: %s at %d\n", relo->name, relo->insn_idx); + name = add_data(gen, relo->name, len); + + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, name)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, len)); + emit(gen, BPF_MOV64_IMM(BPF_REG_3, relo->kind)); + emit(gen, BPF_MOV64_IMM(BPF_REG_4, 0)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_btf_find_by_name_kind)); + emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); + debug_ret(gen, "find_by_name_kind(%s,%d)", relo->name, relo->kind); + emit_check_err(gen); + /* store btf_id into insn[insn_idx].imm */ + insn = insns + sizeof(struct bpf_insn) * relo->insn_idx + + offsetof(struct bpf_insn, imm); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, insn)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, 0)); + if (relo->kind == BTF_KIND_VAR) { + /* store btf_obj_fd into insn[insn_idx + 1].imm */ + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, + sizeof(struct bpf_insn))); + } +} + +static void emit_relos(struct bpf_gen *gen, int insns) +{ + int i; + + for (i = 0; i < gen->relo_cnt; i++) + emit_relo(gen, gen->relos + i, insns); +} + +static void cleanup_relos(struct bpf_gen *gen, int insns) +{ + int i, insn; + + for (i = 0; i < gen->relo_cnt; i++) { + if (gen->relos[i].kind != BTF_KIND_VAR) + continue; + /* close fd recorded in insn[insn_idx + 1].imm */ + insn = insns + + sizeof(struct bpf_insn) * (gen->relos[i].insn_idx + 1) + + offsetof(struct bpf_insn, imm); + emit_sys_close_blob(gen, insn); + } + if (gen->relo_cnt) { + free(gen->relos); + gen->relo_cnt = 0; + gen->relos = NULL; + } +} + +void bpf_gen__prog_load(struct bpf_gen *gen, + struct bpf_prog_load_params *load_attr, int prog_idx) +{ + int attr_size = offsetofend(union bpf_attr, fd_array); + int prog_load_attr, license, insns, func_info, line_info; + union bpf_attr attr; + + memset(&attr, 0, attr_size); + pr_debug("gen: prog_load: type %d insns_cnt %zd\n", + load_attr->prog_type, load_attr->insn_cnt); + /* add license string to blob of bytes */ + license = add_data(gen, load_attr->license, strlen(load_attr->license) + 1); + /* add insns to blob of bytes */ + insns = add_data(gen, load_attr->insns, + load_attr->insn_cnt * sizeof(struct bpf_insn)); + + attr.prog_type = load_attr->prog_type; + attr.expected_attach_type = load_attr->expected_attach_type; + attr.attach_btf_id = load_attr->attach_btf_id; + attr.prog_ifindex = load_attr->prog_ifindex; + attr.kern_version = 0; + attr.insn_cnt = (__u32)load_attr->insn_cnt; + attr.prog_flags = load_attr->prog_flags; + + attr.func_info_rec_size = load_attr->func_info_rec_size; + attr.func_info_cnt = load_attr->func_info_cnt; + func_info = add_data(gen, load_attr->func_info, + attr.func_info_cnt * attr.func_info_rec_size); + + attr.line_info_rec_size = load_attr->line_info_rec_size; + attr.line_info_cnt = load_attr->line_info_cnt; + line_info = add_data(gen, load_attr->line_info, + attr.line_info_cnt * attr.line_info_rec_size); + + memcpy(attr.prog_name, load_attr->name, + min((unsigned)strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); + prog_load_attr = add_data(gen, &attr, attr_size); + + /* populate union bpf_attr with a pointer to license */ + emit_rel_store(gen, attr_field(prog_load_attr, license), license); + + /* populate union bpf_attr with a pointer to instructions */ + emit_rel_store(gen, attr_field(prog_load_attr, insns), insns); + + /* populate union bpf_attr with a pointer to func_info */ + emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info); + + /* populate union bpf_attr with a pointer to line_info */ + emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info); + + /* populate union bpf_attr fd_array with a pointer to stack where map_fds are saved */ + emit_rel_store_sp(gen, attr_field(prog_load_attr, fd_array), + stack_off(map_fd[0])); + + /* populate union bpf_attr with user provided log details */ + move_ctx2blob(gen, attr_field(prog_load_attr, log_level), 4, + offsetof(struct bpf_loader_ctx, log_level), false); + move_ctx2blob(gen, attr_field(prog_load_attr, log_size), 4, + offsetof(struct bpf_loader_ctx, log_size), false); + move_ctx2blob(gen, attr_field(prog_load_attr, log_buf), 8, + offsetof(struct bpf_loader_ctx, log_buf), false); + /* populate union bpf_attr with btf_fd saved in the stack earlier */ + move_stack2blob(gen, attr_field(prog_load_attr, prog_btf_fd), 4, + stack_off(btf_fd)); + if (gen->attach_kind) { + emit_find_attach_target(gen); + /* populate union bpf_attr with btf_id and btf_obj_fd found by helper */ + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, prog_load_attr)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, + offsetof(union bpf_attr, attach_btf_id))); + emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32)); + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, + offsetof(union bpf_attr, attach_btf_obj_fd))); + } + emit_relos(gen, insns); + /* emit PROG_LOAD command */ + emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size); + debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt); + /* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */ + cleanup_relos(gen, insns); + if (gen->attach_kind) + emit_sys_close_blob(gen, + attr_field(prog_load_attr, attach_btf_obj_fd)); + emit_check_err(gen); + /* remember prog_fd in the stack, if successful */ + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, + stack_off(prog_fd[gen->nr_progs]))); + gen->nr_progs++; +} + +void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, + __u32 value_size) +{ + int attr_size = offsetofend(union bpf_attr, flags); + int map_update_attr, value, key; + union bpf_attr attr; + int zero = 0; + + memset(&attr, 0, attr_size); + pr_debug("gen: map_update_elem: idx %d\n", map_idx); + + value = add_data(gen, pvalue, value_size); + key = add_data(gen, &zero, sizeof(zero)); + + /* if (map_desc[map_idx].initial_value) + * copy_from_user(value, initial_value, value_size); + */ + emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, + sizeof(struct bpf_loader_ctx) + + sizeof(struct bpf_map_desc) * map_idx + + offsetof(struct bpf_map_desc, initial_value))); + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 4)); + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, + 0, 0, 0, value)); + emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user)); + + map_update_attr = add_data(gen, &attr, attr_size); + move_stack2blob(gen, attr_field(map_update_attr, map_fd), 4, + stack_off(map_fd[map_idx])); + emit_rel_store(gen, attr_field(map_update_attr, key), key); + emit_rel_store(gen, attr_field(map_update_attr, value), value); + /* emit MAP_UPDATE_ELEM command */ + emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size); + debug_ret(gen, "update_elem idx %d value_size %d", map_idx, value_size); + emit_check_err(gen); +} + +void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) +{ + int attr_size = offsetofend(union bpf_attr, map_fd); + int map_freeze_attr; + union bpf_attr attr; + + memset(&attr, 0, attr_size); + pr_debug("gen: map_freeze: idx %d\n", map_idx); + map_freeze_attr = add_data(gen, &attr, attr_size); + move_stack2blob(gen, attr_field(map_freeze_attr, map_fd), 4, + stack_off(map_fd[map_idx])); + /* emit MAP_FREEZE command */ + emit_sys_bpf(gen, BPF_MAP_FREEZE, map_freeze_attr, attr_size); + debug_ret(gen, "map_freeze"); + emit_check_err(gen); +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e2a3cf437814..48c0ade05ab1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -54,6 +54,7 @@ #include "str_error.h" #include "libbpf_internal.h" #include "hashmap.h" +#include "bpf_gen_internal.h" #ifndef BPF_FS_MAGIC #define BPF_FS_MAGIC 0xcafe4a11 @@ -150,6 +151,23 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } +/* this goes away in libbpf 1.0 */ +enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE; + +int libbpf_set_strict_mode(enum libbpf_strict_mode mode) +{ + /* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to + * get all possible values we compensate last +1, and then (2*x - 1) + * to get the bit mask + */ + if (mode != LIBBPF_STRICT_ALL + && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1))) + return errno = EINVAL, -EINVAL; + + libbpf_mode = mode; + return 0; +} + enum kern_feature_id { /* v4.14: kernel support for program & map names. */ FEAT_PROG_NAME, @@ -178,7 +196,7 @@ enum kern_feature_id { __FEAT_CNT, }; -static bool kernel_supports(enum kern_feature_id feat_id); +static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); enum reloc_type { RELO_LD64, @@ -432,6 +450,8 @@ struct bpf_object { bool loaded; bool has_subcalls; + struct bpf_gen *gen_loader; + /* * Information when doing elf related work. Only valid if fd * is valid. @@ -677,6 +697,11 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } + if (sec_idx != obj->efile.text_shndx && GELF_ST_BIND(sym.st_info) == STB_LOCAL) { + pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name); + return -ENOTSUP; + } + pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n", sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz); @@ -700,13 +725,14 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, if (err) return err; - /* if function is a global/weak symbol, but has hidden - * visibility (STV_HIDDEN), mark its BTF FUNC as static to - * enable more permissive BPF verification mode with more - * outside context available to BPF verifier + /* if function is a global/weak symbol, but has restricted + * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC + * as static to enable more permissive BPF verification mode + * with more outside context available to BPF verifier */ if (GELF_ST_BIND(sym.st_info) != STB_LOCAL - && GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN) + && (GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN + || GELF_ST_VISIBILITY(sym.st_other) == STV_INTERNAL)) prog->mark_btf_static = true; nr_progs++; @@ -1794,7 +1820,6 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) if (!symbols) return -EINVAL; - scn = elf_sec_by_idx(obj, obj->efile.maps_shndx); data = elf_sec_data(obj, scn); if (!scn || !data) { @@ -1854,6 +1879,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) return -LIBBPF_ERRNO__FORMAT; } + if (GELF_ST_TYPE(sym.st_info) == STT_SECTION + || GELF_ST_BIND(sym.st_info) == STB_LOCAL) { + pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); + return -ENOTSUP; + } + map->libbpf_type = LIBBPF_MAP_UNSPEC; map->sec_idx = sym.st_shndx; map->sec_offset = sym.st_value; @@ -2261,6 +2292,16 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def pr_debug("map '%s': found inner map definition.\n", map->name); } +static const char *btf_var_linkage_str(__u32 linkage) +{ + switch (linkage) { + case BTF_VAR_STATIC: return "static"; + case BTF_VAR_GLOBAL_ALLOCATED: return "global"; + case BTF_VAR_GLOBAL_EXTERN: return "extern"; + default: return "unknown"; + } +} + static int bpf_object__init_user_btf_map(struct bpf_object *obj, const struct btf_type *sec, int var_idx, int sec_idx, @@ -2293,10 +2334,9 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, map_name, btf_kind_str(var)); return -EINVAL; } - if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && - var_extra->linkage != BTF_VAR_STATIC) { - pr_warn("map '%s': unsupported var linkage %u.\n", - map_name, var_extra->linkage); + if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) { + pr_warn("map '%s': unsupported map linkage %s.\n", + map_name, btf_var_linkage_str(var_extra->linkage)); return -EOPNOTSUPP; } @@ -2425,10 +2465,8 @@ static int bpf_object__init_maps(struct bpf_object *obj, err = err ?: bpf_object__init_global_data_maps(obj); err = err ?: bpf_object__init_kconfig_map(obj); err = err ?: bpf_object__init_struct_ops_maps(obj); - if (err) - return err; - return 0; + return err; } static bool section_have_execinstr(struct bpf_object *obj, int idx) @@ -2443,20 +2481,20 @@ static bool section_have_execinstr(struct bpf_object *obj, int idx) static bool btf_needs_sanitization(struct bpf_object *obj) { - bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC); - bool has_datasec = kernel_supports(FEAT_BTF_DATASEC); - bool has_float = kernel_supports(FEAT_BTF_FLOAT); - bool has_func = kernel_supports(FEAT_BTF_FUNC); + bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); + bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); + bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); + bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); return !has_func || !has_datasec || !has_func_global || !has_float; } static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) { - bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC); - bool has_datasec = kernel_supports(FEAT_BTF_DATASEC); - bool has_float = kernel_supports(FEAT_BTF_FLOAT); - bool has_func = kernel_supports(FEAT_BTF_FUNC); + bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); + bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); + bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT); + bool has_func = kernel_supports(obj, FEAT_BTF_FUNC); struct btf_type *t; int i, j, vlen; @@ -2539,16 +2577,14 @@ static int bpf_object__init_btf(struct bpf_object *obj, if (btf_data) { obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); - if (IS_ERR(obj->btf)) { - err = PTR_ERR(obj->btf); + err = libbpf_get_error(obj->btf); + if (err) { obj->btf = NULL; - pr_warn("Error loading ELF section %s: %d.\n", - BTF_ELF_SEC, err); + pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); goto out; } /* enforce 8-byte pointers for BPF-targeted BTFs */ btf__set_pointer_size(obj->btf, 8); - err = 0; } if (btf_ext_data) { if (!obj->btf) { @@ -2556,11 +2592,11 @@ static int bpf_object__init_btf(struct bpf_object *obj, BTF_EXT_ELF_SEC, BTF_ELF_SEC); goto out; } - obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, - btf_ext_data->d_size); - if (IS_ERR(obj->btf_ext)) { - pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n", - BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext)); + obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); + err = libbpf_get_error(obj->btf_ext); + if (err) { + pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n", + BTF_EXT_ELF_SEC, err); obj->btf_ext = NULL; goto out; } @@ -2637,15 +2673,15 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) int err; /* btf_vmlinux could be loaded earlier */ - if (obj->btf_vmlinux) + if (obj->btf_vmlinux || obj->gen_loader) return 0; if (!force && !obj_needs_vmlinux_btf(obj)) return 0; obj->btf_vmlinux = libbpf_find_kernel_btf(); - if (IS_ERR(obj->btf_vmlinux)) { - err = PTR_ERR(obj->btf_vmlinux); + err = libbpf_get_error(obj->btf_vmlinux); + if (err) { pr_warn("Error loading vmlinux BTF: %d\n", err); obj->btf_vmlinux = NULL; return err; @@ -2662,7 +2698,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) if (!obj->btf) return 0; - if (!kernel_supports(FEAT_BTF)) { + if (!kernel_supports(obj, FEAT_BTF)) { if (kernel_needs_btf(obj)) { err = -EOPNOTSUPP; goto report; @@ -2711,15 +2747,29 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) /* clone BTF to sanitize a copy and leave the original intact */ raw_data = btf__get_raw_data(obj->btf, &sz); kern_btf = btf__new(raw_data, sz); - if (IS_ERR(kern_btf)) - return PTR_ERR(kern_btf); + err = libbpf_get_error(kern_btf); + if (err) + return err; /* enforce 8-byte pointers for BPF-targeted BTFs */ btf__set_pointer_size(obj->btf, 8); bpf_object__sanitize_btf(obj, kern_btf); } - err = btf__load(kern_btf); + if (obj->gen_loader) { + __u32 raw_size = 0; + const void *raw_data = btf__get_raw_data(kern_btf, &raw_size); + + if (!raw_data) + return -ENOMEM; + bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size); + /* Pretend to have valid FD to pass various fd >= 0 checks. + * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. + */ + btf__set_fd(kern_btf, 0); + } else { + err = btf__load(kern_btf); + } if (sanitize) { if (!err) { /* move fd to libbpf's BTF */ @@ -3216,6 +3266,9 @@ static int add_dummy_ksym_var(struct btf *btf) const struct btf_var_secinfo *vs; const struct btf_type *sec; + if (!btf) + return 0; + sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC, BTF_KIND_DATASEC); if (sec_btf_id < 0) @@ -3470,7 +3523,7 @@ bpf_object__find_program_by_title(const struct bpf_object *obj, if (pos->sec_name && !strcmp(pos->sec_name, title)) return pos; } - return NULL; + return errno = ENOENT, NULL; } static bool prog_is_subprog(const struct bpf_object *obj, @@ -3503,7 +3556,7 @@ bpf_object__find_program_by_name(const struct bpf_object *obj, if (!strcmp(prog->name, name)) return prog; } - return NULL; + return errno = ENOENT, NULL; } static bool bpf_object__shndx_is_data(const struct bpf_object *obj, @@ -3850,11 +3903,11 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err) - return err; + return libbpf_err(err); new_name = strdup(info.name); if (!new_name) - return -errno; + return libbpf_err(-errno); new_fd = open("/", O_RDONLY | O_CLOEXEC); if (new_fd < 0) { @@ -3892,7 +3945,7 @@ err_close_new_fd: close(new_fd); err_free_new_name: free(new_name); - return err; + return libbpf_err(err); } __u32 bpf_map__max_entries(const struct bpf_map *map) @@ -3903,7 +3956,7 @@ __u32 bpf_map__max_entries(const struct bpf_map *map) struct bpf_map *bpf_map__inner_map(struct bpf_map *map) { if (!bpf_map_type__is_map_in_map(map->def.type)) - return NULL; + return errno = EINVAL, NULL; return map->inner_map; } @@ -3911,7 +3964,7 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map) int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) { if (map->fd >= 0) - return -EBUSY; + return libbpf_err(-EBUSY); map->def.max_entries = max_entries; return 0; } @@ -3919,7 +3972,7 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) int bpf_map__resize(struct bpf_map *map, __u32 max_entries) { if (!map || !max_entries) - return -EINVAL; + return libbpf_err(-EINVAL); return bpf_map__set_max_entries(map, max_entries); } @@ -3935,6 +3988,9 @@ bpf_object__probe_loading(struct bpf_object *obj) }; int ret; + if (obj->gen_loader) + return 0; + /* make sure basic loading works */ memset(&attr, 0, sizeof(attr)); @@ -4290,11 +4346,17 @@ static struct kern_feature_desc { }, }; -static bool kernel_supports(enum kern_feature_id feat_id) +static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { struct kern_feature_desc *feat = &feature_probes[feat_id]; int ret; + if (obj->gen_loader) + /* To generate loader program assume the latest kernel + * to avoid doing extra prog_load, map_create syscalls. + */ + return true; + if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { ret = feat->probe(); if (ret > 0) { @@ -4377,6 +4439,13 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) char *cp, errmsg[STRERR_BUFSIZE]; int err, zero = 0; + if (obj->gen_loader) { + bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, + map->mmaped, map->def.value_size); + if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) + bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); + return 0; + } err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); if (err) { err = -errno; @@ -4402,14 +4471,14 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); -static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) +static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { struct bpf_create_map_attr create_attr; struct bpf_map_def *def = &map->def; memset(&create_attr, 0, sizeof(create_attr)); - if (kernel_supports(FEAT_PROG_NAME)) + if (kernel_supports(obj, FEAT_PROG_NAME)) create_attr.name = map->name; create_attr.map_ifindex = map->map_ifindex; create_attr.map_type = def->type; @@ -4450,7 +4519,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) if (map->inner_map) { int err; - err = bpf_object__create_map(obj, map->inner_map); + err = bpf_object__create_map(obj, map->inner_map, true); if (err) { pr_warn("map '%s': failed to create inner map: %d\n", map->name, err); @@ -4462,7 +4531,15 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) create_attr.inner_map_fd = map->inner_map_fd; } - map->fd = bpf_create_map_xattr(&create_attr); + if (obj->gen_loader) { + bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps); + /* Pretend to have valid FD to pass various fd >= 0 checks. + * This fd == 0 will not be used with any syscall and will be reset to -1 eventually. + */ + map->fd = 0; + } else { + map->fd = bpf_create_map_xattr(&create_attr); + } if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { char *cp, errmsg[STRERR_BUFSIZE]; @@ -4483,6 +4560,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) return -errno; if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { + if (obj->gen_loader) + map->inner_map->fd = -1; bpf_map__destroy(map->inner_map); zfree(&map->inner_map); } @@ -4490,11 +4569,11 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) return 0; } -static int init_map_slots(struct bpf_map *map) +static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) { const struct bpf_map *targ_map; unsigned int i; - int fd, err; + int fd, err = 0; for (i = 0; i < map->init_slots_sz; i++) { if (!map->init_slots[i]) @@ -4502,7 +4581,13 @@ static int init_map_slots(struct bpf_map *map) targ_map = map->init_slots[i]; fd = bpf_map__fd(targ_map); - err = bpf_map_update_elem(map->fd, &i, &fd, 0); + if (obj->gen_loader) { + pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n", + map - obj->maps, i, targ_map - obj->maps); + return -ENOTSUP; + } else { + err = bpf_map_update_elem(map->fd, &i, &fd, 0); + } if (err) { err = -errno; pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", @@ -4544,7 +4629,7 @@ bpf_object__create_maps(struct bpf_object *obj) pr_debug("map '%s': skipping creation (preset fd=%d)\n", map->name, map->fd); } else { - err = bpf_object__create_map(obj, map); + err = bpf_object__create_map(obj, map, false); if (err) goto err_out; @@ -4560,7 +4645,7 @@ bpf_object__create_maps(struct bpf_object *obj) } if (map->init_slots_sz) { - err = init_map_slots(map); + err = init_map_slots(obj, map); if (err < 0) { zclose(map->fd); goto err_out; @@ -4970,11 +5055,14 @@ static int load_module_btfs(struct bpf_object *obj) if (obj->btf_modules_loaded) return 0; + if (obj->gen_loader) + return 0; + /* don't do this again, even if we find no module BTFs */ obj->btf_modules_loaded = true; /* kernel too old to support module BTFs */ - if (!kernel_supports(FEAT_MODULE_BTF)) + if (!kernel_supports(obj, FEAT_MODULE_BTF)) return 0; while (true) { @@ -5015,10 +5103,10 @@ static int load_module_btfs(struct bpf_object *obj) } btf = btf_get_from_fd(fd, obj->btf_vmlinux); - if (IS_ERR(btf)) { - pr_warn("failed to load module [%s]'s BTF object #%d: %ld\n", - name, id, PTR_ERR(btf)); - err = PTR_ERR(btf); + err = libbpf_get_error(btf); + if (err) { + pr_warn("failed to load module [%s]'s BTF object #%d: %d\n", + name, id, err); goto err_out; } @@ -6117,6 +6205,12 @@ static int bpf_core_apply_relo(struct bpf_program *prog, if (str_is_empty(spec_str)) return -EINVAL; + if (prog->obj->gen_loader) { + pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n", + prog - prog->obj->programs, relo->insn_off / 8, + local_name, spec_str, relo->kind); + return -ENOTSUP; + } err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); if (err) { pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", @@ -6272,8 +6366,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) if (targ_btf_path) { obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); - if (IS_ERR_OR_NULL(obj->btf_vmlinux_override)) { - err = PTR_ERR(obj->btf_vmlinux_override); + err = libbpf_get_error(obj->btf_vmlinux_override); + if (err) { pr_warn("failed to parse target BTF: %d\n", err); return err; } @@ -6368,19 +6462,34 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) switch (relo->type) { case RELO_LD64: - insn[0].src_reg = BPF_PSEUDO_MAP_FD; - insn[0].imm = obj->maps[relo->map_idx].fd; + if (obj->gen_loader) { + insn[0].src_reg = BPF_PSEUDO_MAP_IDX; + insn[0].imm = relo->map_idx; + } else { + insn[0].src_reg = BPF_PSEUDO_MAP_FD; + insn[0].imm = obj->maps[relo->map_idx].fd; + } break; case RELO_DATA: - insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; insn[1].imm = insn[0].imm + relo->sym_off; - insn[0].imm = obj->maps[relo->map_idx].fd; + if (obj->gen_loader) { + insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; + insn[0].imm = relo->map_idx; + } else { + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[0].imm = obj->maps[relo->map_idx].fd; + } break; case RELO_EXTERN_VAR: ext = &obj->externs[relo->sym_off]; if (ext->type == EXT_KCFG) { - insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; + if (obj->gen_loader) { + insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; + insn[0].imm = obj->kconfig_map_idx; + } else { + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; + } insn[1].imm = ext->kcfg.data_off; } else /* EXT_KSYM */ { if (ext->ksym.type_id) { /* typed ksyms */ @@ -6399,11 +6508,15 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) insn[0].imm = ext->ksym.kernel_btf_id; break; case RELO_SUBPROG_ADDR: - insn[0].src_reg = BPF_PSEUDO_FUNC; - /* will be handled as a follow up pass */ + if (insn[0].src_reg != BPF_PSEUDO_FUNC) { + pr_warn("prog '%s': relo #%d: bad insn\n", + prog->name, i); + return -EINVAL; + } + /* handled already */ break; case RELO_CALL: - /* will be handled as a follow up pass */ + /* handled already */ break; default: pr_warn("prog '%s': relo #%d: bad relo type %d\n", @@ -6494,7 +6607,7 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj, /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't * supprot func/line info */ - if (!obj->btf_ext || !kernel_supports(FEAT_BTF_FUNC)) + if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC)) return 0; /* only attempt func info relocation if main program's func_info @@ -6572,6 +6685,30 @@ static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, si sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx); } +static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog) +{ + int new_cnt = main_prog->nr_reloc + subprog->nr_reloc; + struct reloc_desc *relos; + int i; + + if (main_prog == subprog) + return 0; + relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos)); + if (!relos) + return -ENOMEM; + memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, + sizeof(*relos) * subprog->nr_reloc); + + for (i = main_prog->nr_reloc; i < new_cnt; i++) + relos[i].insn_idx += subprog->sub_insn_off; + /* After insn_idx adjustment the 'relos' array is still sorted + * by insn_idx and doesn't break bsearch. + */ + main_prog->reloc_desc = relos; + main_prog->nr_reloc = new_cnt; + return 0; +} + static int bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, struct bpf_program *prog) @@ -6592,6 +6729,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, continue; relo = find_prog_insn_relo(prog, insn_idx); + if (relo && relo->type == RELO_EXTERN_FUNC) + /* kfunc relocations will be handled later + * in bpf_object__relocate_data() + */ + continue; if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) { pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n", prog->name, insn_idx, relo->type); @@ -6666,6 +6808,10 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", main_prog->name, subprog->insns_cnt, subprog->name); + /* The subprog insns are now appended. Append its relos too. */ + err = append_subprog_relos(main_prog, subprog); + if (err) + return err; err = bpf_object__reloc_code(obj, main_prog, subprog); if (err) return err; @@ -6795,11 +6941,25 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog) return 0; } +static void +bpf_object__free_relocs(struct bpf_object *obj) +{ + struct bpf_program *prog; + int i; + + /* free up relocation descriptors */ + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + zfree(&prog->reloc_desc); + prog->nr_reloc = 0; + } +} + static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) { struct bpf_program *prog; - size_t i; + size_t i, j; int err; if (obj->btf_ext) { @@ -6810,23 +6970,32 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } } - /* relocate data references first for all programs and sub-programs, - * as they don't change relative to code locations, so subsequent - * subprogram processing won't need to re-calculate any of them + + /* Before relocating calls pre-process relocations and mark + * few ld_imm64 instructions that points to subprogs. + * Otherwise bpf_object__reloc_code() later would have to consider + * all ld_imm64 insns as relocation candidates. That would + * reduce relocation speed, since amount of find_prog_insn_relo() + * would increase and most of them will fail to find a relo. */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; - err = bpf_object__relocate_data(obj, prog); - if (err) { - pr_warn("prog '%s': failed to relocate data references: %d\n", - prog->name, err); - return err; + for (j = 0; j < prog->nr_reloc; j++) { + struct reloc_desc *relo = &prog->reloc_desc[j]; + struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + + /* mark the insn, so it's recognized by insn_is_pseudo_func() */ + if (relo->type == RELO_SUBPROG_ADDR) + insn[0].src_reg = BPF_PSEUDO_FUNC; } } - /* now relocate subprogram calls and append used subprograms to main + + /* relocate subprogram calls and append used subprograms to main * programs; each copy of subprogram code needs to be relocated * differently for each main program, because its code location might - * have changed + * have changed. + * Append subprog relos to main programs to allow data relos to be + * processed after text is completely relocated. */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; @@ -6843,12 +7012,20 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) return err; } } - /* free up relocation descriptors */ + /* Process data relos for main programs */ for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; - zfree(&prog->reloc_desc); - prog->nr_reloc = 0; + if (prog_is_subprog(obj, prog)) + continue; + err = bpf_object__relocate_data(obj, prog); + if (err) { + pr_warn("prog '%s': failed to relocate data references: %d\n", + prog->name, err); + return err; + } } + if (!obj->gen_loader) + bpf_object__free_relocs(obj); return 0; } @@ -7037,6 +7214,9 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program enum bpf_func_id func_id; int i; + if (obj->gen_loader) + return 0; + for (i = 0; i < prog->insns_cnt; i++, insn++) { if (!insn_is_helper_call(insn, &func_id)) continue; @@ -7048,12 +7228,12 @@ static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program switch (func_id) { case BPF_FUNC_probe_read_kernel: case BPF_FUNC_probe_read_user: - if (!kernel_supports(FEAT_PROBE_READ_KERN)) + if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) insn->imm = BPF_FUNC_probe_read; break; case BPF_FUNC_probe_read_kernel_str: case BPF_FUNC_probe_read_user_str: - if (!kernel_supports(FEAT_PROBE_READ_KERN)) + if (!kernel_supports(obj, FEAT_PROBE_READ_KERN)) insn->imm = BPF_FUNC_probe_read_str; break; default: @@ -7088,12 +7268,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.prog_type = prog->type; /* old kernels might not support specifying expected_attach_type */ - if (!kernel_supports(FEAT_EXP_ATTACH_TYPE) && prog->sec_def && + if (!kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE) && prog->sec_def && prog->sec_def->is_exp_attach_type_optional) load_attr.expected_attach_type = 0; else load_attr.expected_attach_type = prog->expected_attach_type; - if (kernel_supports(FEAT_PROG_NAME)) + if (kernel_supports(prog->obj, FEAT_PROG_NAME)) load_attr.name = prog->name; load_attr.insns = insns; load_attr.insn_cnt = insns_cnt; @@ -7109,7 +7289,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, /* specify func_info/line_info only if kernel supports them */ btf_fd = bpf_object__btf_fd(prog->obj); - if (btf_fd >= 0 && kernel_supports(FEAT_BTF_FUNC)) { + if (btf_fd >= 0 && kernel_supports(prog->obj, FEAT_BTF_FUNC)) { load_attr.prog_btf_fd = btf_fd; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; @@ -7121,6 +7301,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, load_attr.log_level = prog->log_level; load_attr.prog_flags = prog->prog_flags; + if (prog->obj->gen_loader) { + bpf_gen__prog_load(prog->obj->gen_loader, &load_attr, + prog - prog->obj->programs); + *pfd = -1; + return 0; + } retry_load: if (log_buf_size) { log_buf = malloc(log_buf_size); @@ -7139,7 +7325,7 @@ retry_load: pr_debug("verifier log:\n%s", log_buf); if (prog->obj->rodata_map_idx >= 0 && - kernel_supports(FEAT_PROG_BIND_MAP)) { + kernel_supports(prog->obj, FEAT_PROG_BIND_MAP)) { struct bpf_map *rodata_map = &prog->obj->maps[prog->obj->rodata_map_idx]; @@ -7198,6 +7384,38 @@ out: return ret; } +static int bpf_program__record_externs(struct bpf_program *prog) +{ + struct bpf_object *obj = prog->obj; + int i; + + for (i = 0; i < prog->nr_reloc; i++) { + struct reloc_desc *relo = &prog->reloc_desc[i]; + struct extern_desc *ext = &obj->externs[relo->sym_off]; + + switch (relo->type) { + case RELO_EXTERN_VAR: + if (ext->type != EXT_KSYM) + continue; + if (!ext->ksym.type_id) { + pr_warn("typeless ksym %s is not supported yet\n", + ext->name); + return -ENOTSUP; + } + bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_VAR, + relo->insn_idx); + break; + case RELO_EXTERN_FUNC: + bpf_gen__record_extern(obj->gen_loader, ext->name, BTF_KIND_FUNC, + relo->insn_idx); + break; + default: + continue; + } + } + return 0; +} + static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, int *btf_type_id); int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) @@ -7206,7 +7424,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) if (prog->obj->loaded) { pr_warn("prog '%s': can't load after object was loaded\n", prog->name); - return -EINVAL; + return libbpf_err(-EINVAL); } if ((prog->type == BPF_PROG_TYPE_TRACING || @@ -7216,7 +7434,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id); if (err) - return err; + return libbpf_err(err); prog->attach_btf_obj_fd = btf_obj_fd; prog->attach_btf_id = btf_type_id; @@ -7226,13 +7444,13 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) if (prog->preprocessor) { pr_warn("Internal error: can't load program '%s'\n", prog->name); - return -LIBBPF_ERRNO__INTERNAL; + return libbpf_err(-LIBBPF_ERRNO__INTERNAL); } prog->instances.fds = malloc(sizeof(int)); if (!prog->instances.fds) { pr_warn("Not enough memory for BPF fds\n"); - return -ENOMEM; + return libbpf_err(-ENOMEM); } prog->instances.nr = 1; prog->instances.fds[0] = -1; @@ -7243,6 +7461,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) pr_warn("prog '%s': inconsistent nr(%d) != 1\n", prog->name, prog->instances.nr); } + if (prog->obj->gen_loader) + bpf_program__record_externs(prog); err = load_program(prog, prog->insns, prog->insns_cnt, license, kern_ver, &fd); if (!err) @@ -7289,7 +7509,7 @@ out: pr_warn("failed to load program '%s'\n", prog->name); zfree(&prog->insns); prog->insns_cnt = 0; - return err; + return libbpf_err(err); } static int @@ -7319,6 +7539,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) if (err) return err; } + if (obj->gen_loader) + bpf_object__free_relocs(obj); return 0; } @@ -7420,7 +7642,7 @@ __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags) struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) { - return __bpf_object__open_xattr(attr, 0); + return libbpf_ptr(__bpf_object__open_xattr(attr, 0)); } struct bpf_object *bpf_object__open(const char *path) @@ -7430,18 +7652,18 @@ struct bpf_object *bpf_object__open(const char *path) .prog_type = BPF_PROG_TYPE_UNSPEC, }; - return bpf_object__open_xattr(&attr); + return libbpf_ptr(__bpf_object__open_xattr(&attr, 0)); } struct bpf_object * bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) { if (!path) - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); pr_debug("loading %s\n", path); - return __bpf_object__open(path, NULL, 0, opts); + return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts)); } struct bpf_object * @@ -7449,9 +7671,9 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { if (!obj_buf || obj_buf_sz == 0) - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); - return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts); + return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts)); } struct bpf_object * @@ -7466,9 +7688,9 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, /* returning NULL is wrong, but backwards-compatible */ if (!obj_buf || obj_buf_sz == 0) - return NULL; + return errno = EINVAL, NULL; - return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts); + return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts)); } int bpf_object__unload(struct bpf_object *obj) @@ -7476,7 +7698,7 @@ int bpf_object__unload(struct bpf_object *obj) size_t i; if (!obj) - return -EINVAL; + return libbpf_err(-EINVAL); for (i = 0; i < obj->nr_maps; i++) { zclose(obj->maps[i].fd); @@ -7497,11 +7719,11 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj) bpf_object__for_each_map(m, obj) { if (!bpf_map__is_internal(m)) continue; - if (!kernel_supports(FEAT_GLOBAL_DATA)) { + if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) { pr_warn("kernel doesn't support global data\n"); return -ENOTSUP; } - if (!kernel_supports(FEAT_ARRAY_MMAP)) + if (!kernel_supports(obj, FEAT_ARRAY_MMAP)) m->def.map_flags ^= BPF_F_MMAPABLE; } @@ -7699,6 +7921,12 @@ static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj) if (ext->type != EXT_KSYM || !ext->ksym.type_id) continue; + if (obj->gen_loader) { + ext->is_set = true; + ext->ksym.kernel_btf_obj_fd = 0; + ext->ksym.kernel_btf_id = 0; + continue; + } t = btf__type_by_id(obj->btf, ext->btf_id); if (btf_is_var(t)) err = bpf_object__resolve_ksym_var_btf_id(obj, ext); @@ -7803,16 +8031,19 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) int err, i; if (!attr) - return -EINVAL; + return libbpf_err(-EINVAL); obj = attr->obj; if (!obj) - return -EINVAL; + return libbpf_err(-EINVAL); if (obj->loaded) { pr_warn("object '%s': load can't be attempted twice\n", obj->name); - return -EINVAL; + return libbpf_err(-EINVAL); } + if (obj->gen_loader) + bpf_gen__init(obj->gen_loader, attr->log_level); + err = bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); @@ -7823,6 +8054,15 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) err = err ? : bpf_object__relocate(obj, attr->target_btf_path); err = err ? : bpf_object__load_progs(obj, attr->log_level); + if (obj->gen_loader) { + /* reset FDs */ + btf__set_fd(obj->btf, -1); + for (i = 0; i < obj->nr_maps; i++) + obj->maps[i].fd = -1; + if (!err) + err = bpf_gen__finish(obj->gen_loader); + } + /* clean up module BTFs */ for (i = 0; i < obj->btf_module_cnt; i++) { close(obj->btf_modules[i].fd); @@ -7849,7 +8089,7 @@ out: bpf_object__unload(obj); pr_warn("failed to load object '%s'\n", obj->path); - return err; + return libbpf_err(err); } int bpf_object__load(struct bpf_object *obj) @@ -7921,28 +8161,28 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, err = make_parent_dir(path); if (err) - return err; + return libbpf_err(err); err = check_path(path); if (err) - return err; + return libbpf_err(err); if (prog == NULL) { pr_warn("invalid program pointer\n"); - return -EINVAL; + return libbpf_err(-EINVAL); } if (instance < 0 || instance >= prog->instances.nr) { pr_warn("invalid prog instance %d of prog %s (max %d)\n", instance, prog->name, prog->instances.nr); - return -EINVAL; + return libbpf_err(-EINVAL); } if (bpf_obj_pin(prog->instances.fds[instance], path)) { err = -errno; cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); pr_warn("failed to pin program: %s\n", cp); - return err; + return libbpf_err(err); } pr_debug("pinned program '%s'\n", path); @@ -7956,22 +8196,23 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, err = check_path(path); if (err) - return err; + return libbpf_err(err); if (prog == NULL) { pr_warn("invalid program pointer\n"); - return -EINVAL; + return libbpf_err(-EINVAL); } if (instance < 0 || instance >= prog->instances.nr) { pr_warn("invalid prog instance %d of prog %s (max %d)\n", instance, prog->name, prog->instances.nr); - return -EINVAL; + return libbpf_err(-EINVAL); } err = unlink(path); if (err != 0) - return -errno; + return libbpf_err(-errno); + pr_debug("unpinned program '%s'\n", path); return 0; @@ -7983,20 +8224,20 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) err = make_parent_dir(path); if (err) - return err; + return libbpf_err(err); err = check_path(path); if (err) - return err; + return libbpf_err(err); if (prog == NULL) { pr_warn("invalid program pointer\n"); - return -EINVAL; + return libbpf_err(-EINVAL); } if (prog->instances.nr <= 0) { pr_warn("no instances of prog %s to pin\n", prog->name); - return -EINVAL; + return libbpf_err(-EINVAL); } if (prog->instances.nr == 1) { @@ -8040,7 +8281,7 @@ err_unpin: rmdir(path); - return err; + return libbpf_err(err); } int bpf_program__unpin(struct bpf_program *prog, const char *path) @@ -8049,16 +8290,16 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) err = check_path(path); if (err) - return err; + return libbpf_err(err); if (prog == NULL) { pr_warn("invalid program pointer\n"); - return -EINVAL; + return libbpf_err(-EINVAL); } if (prog->instances.nr <= 0) { pr_warn("no instances of prog %s to pin\n", prog->name); - return -EINVAL; + return libbpf_err(-EINVAL); } if (prog->instances.nr == 1) { @@ -8072,9 +8313,9 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) len = snprintf(buf, PATH_MAX, "%s/%d", path, i); if (len < 0) - return -EINVAL; + return libbpf_err(-EINVAL); else if (len >= PATH_MAX) - return -ENAMETOOLONG; + return libbpf_err(-ENAMETOOLONG); err = bpf_program__unpin_instance(prog, buf, i); if (err) @@ -8083,7 +8324,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) err = rmdir(path); if (err) - return -errno; + return libbpf_err(-errno); return 0; } @@ -8095,14 +8336,14 @@ int bpf_map__pin(struct bpf_map *map, const char *path) if (map == NULL) { pr_warn("invalid map pointer\n"); - return -EINVAL; + return libbpf_err(-EINVAL); } if (map->pin_path) { if (path && strcmp(path, map->pin_path)) { pr_warn("map '%s' already has pin path '%s' different from '%s'\n", bpf_map__name(map), map->pin_path, path); - return -EINVAL; + return libbpf_err(-EINVAL); } else if (map->pinned) { pr_debug("map '%s' already pinned at '%s'; not re-pinning\n", bpf_map__name(map), map->pin_path); @@ -8112,10 +8353,10 @@ int bpf_map__pin(struct bpf_map *map, const char *path) if (!path) { pr_warn("missing a path to pin map '%s' at\n", bpf_map__name(map)); - return -EINVAL; + return libbpf_err(-EINVAL); } else if (map->pinned) { pr_warn("map '%s' already pinned\n", bpf_map__name(map)); - return -EEXIST; + return libbpf_err(-EEXIST); } map->pin_path = strdup(path); @@ -8127,11 +8368,11 @@ int bpf_map__pin(struct bpf_map *map, const char *path) err = make_parent_dir(map->pin_path); if (err) - return err; + return libbpf_err(err); err = check_path(map->pin_path); if (err) - return err; + return libbpf_err(err); if (bpf_obj_pin(map->fd, map->pin_path)) { err = -errno; @@ -8146,7 +8387,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path) out_err: cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); pr_warn("failed to pin map: %s\n", cp); - return err; + return libbpf_err(err); } int bpf_map__unpin(struct bpf_map *map, const char *path) @@ -8155,29 +8396,29 @@ int bpf_map__unpin(struct bpf_map *map, const char *path) if (map == NULL) { pr_warn("invalid map pointer\n"); - return -EINVAL; + return libbpf_err(-EINVAL); } if (map->pin_path) { if (path && strcmp(path, map->pin_path)) { pr_warn("map '%s' already has pin path '%s' different from '%s'\n", bpf_map__name(map), map->pin_path, path); - return -EINVAL; + return libbpf_err(-EINVAL); } path = map->pin_path; } else if (!path) { pr_warn("no path to unpin map '%s' from\n", bpf_map__name(map)); - return -EINVAL; + return libbpf_err(-EINVAL); } err = check_path(path); if (err) - return err; + return libbpf_err(err); err = unlink(path); if (err != 0) - return -errno; + return libbpf_err(-errno); map->pinned = false; pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path); @@ -8192,7 +8433,7 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path) if (path) { new = strdup(path); if (!new) - return -errno; + return libbpf_err(-errno); } free(map->pin_path); @@ -8226,11 +8467,11 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) int err; if (!obj) - return -ENOENT; + return libbpf_err(-ENOENT); if (!obj->loaded) { pr_warn("object not yet loaded; load it first\n"); - return -ENOENT; + return libbpf_err(-ENOENT); } bpf_object__for_each_map(map, obj) { @@ -8270,7 +8511,7 @@ err_unpin_maps: bpf_map__unpin(map, NULL); } - return err; + return libbpf_err(err); } int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) @@ -8279,7 +8520,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) int err; if (!obj) - return -ENOENT; + return libbpf_err(-ENOENT); bpf_object__for_each_map(map, obj) { char *pin_path = NULL; @@ -8291,9 +8532,9 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map)); if (len < 0) - return -EINVAL; + return libbpf_err(-EINVAL); else if (len >= PATH_MAX) - return -ENAMETOOLONG; + return libbpf_err(-ENAMETOOLONG); sanitize_pin_path(buf); pin_path = buf; } else if (!map->pin_path) { @@ -8302,7 +8543,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) err = bpf_map__unpin(map, pin_path); if (err) - return err; + return libbpf_err(err); } return 0; @@ -8314,11 +8555,11 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) int err; if (!obj) - return -ENOENT; + return libbpf_err(-ENOENT); if (!obj->loaded) { pr_warn("object not yet loaded; load it first\n"); - return -ENOENT; + return libbpf_err(-ENOENT); } bpf_object__for_each_program(prog, obj) { @@ -8357,7 +8598,7 @@ err_unpin_programs: bpf_program__unpin(prog, buf); } - return err; + return libbpf_err(err); } int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) @@ -8366,7 +8607,7 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) int err; if (!obj) - return -ENOENT; + return libbpf_err(-ENOENT); bpf_object__for_each_program(prog, obj) { char buf[PATH_MAX]; @@ -8375,13 +8616,13 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->pin_name); if (len < 0) - return -EINVAL; + return libbpf_err(-EINVAL); else if (len >= PATH_MAX) - return -ENAMETOOLONG; + return libbpf_err(-ENAMETOOLONG); err = bpf_program__unpin(prog, buf); if (err) - return err; + return libbpf_err(err); } return 0; @@ -8393,12 +8634,12 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) err = bpf_object__pin_maps(obj, path); if (err) - return err; + return libbpf_err(err); err = bpf_object__pin_programs(obj, path); if (err) { bpf_object__unpin_maps(obj, path); - return err; + return libbpf_err(err); } return 0; @@ -8448,6 +8689,7 @@ void bpf_object__close(struct bpf_object *obj) if (obj->clear_priv) obj->clear_priv(obj, obj->priv); + bpf_gen__free(obj->gen_loader); bpf_object__elf_finish(obj); bpf_object__unload(obj); btf__free(obj->btf); @@ -8494,7 +8736,7 @@ bpf_object__next(struct bpf_object *prev) const char *bpf_object__name(const struct bpf_object *obj) { - return obj ? obj->name : ERR_PTR(-EINVAL); + return obj ? obj->name : libbpf_err_ptr(-EINVAL); } unsigned int bpf_object__kversion(const struct bpf_object *obj) @@ -8515,7 +8757,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj) int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) { if (obj->loaded) - return -EINVAL; + return libbpf_err(-EINVAL); obj->kern_version = kern_version; @@ -8535,7 +8777,23 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv, void *bpf_object__priv(const struct bpf_object *obj) { - return obj ? obj->priv : ERR_PTR(-EINVAL); + return obj ? obj->priv : libbpf_err_ptr(-EINVAL); +} + +int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) +{ + struct bpf_gen *gen; + + if (!opts) + return -EFAULT; + if (!OPTS_VALID(opts, gen_loader_opts)) + return -EINVAL; + gen = calloc(sizeof(*gen), 1); + if (!gen) + return -ENOMEM; + gen->opts = opts; + obj->gen_loader = gen; + return 0; } static struct bpf_program * @@ -8555,7 +8813,7 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, if (p->obj != obj) { pr_warn("error: program handler doesn't match object\n"); - return NULL; + return errno = EINVAL, NULL; } idx = (p - obj->programs) + (forward ? 1 : -1); @@ -8601,7 +8859,7 @@ int bpf_program__set_priv(struct bpf_program *prog, void *priv, void *bpf_program__priv(const struct bpf_program *prog) { - return prog ? prog->priv : ERR_PTR(-EINVAL); + return prog ? prog->priv : libbpf_err_ptr(-EINVAL); } void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) @@ -8628,7 +8886,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) title = strdup(title); if (!title) { pr_warn("failed to strdup program title\n"); - return ERR_PTR(-ENOMEM); + return libbpf_err_ptr(-ENOMEM); } } @@ -8643,7 +8901,7 @@ bool bpf_program__autoload(const struct bpf_program *prog) int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) { if (prog->obj->loaded) - return -EINVAL; + return libbpf_err(-EINVAL); prog->load = autoload; return 0; @@ -8665,17 +8923,17 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, int *instances_fds; if (nr_instances <= 0 || !prep) - return -EINVAL; + return libbpf_err(-EINVAL); if (prog->instances.nr > 0 || prog->instances.fds) { pr_warn("Can't set pre-processor after loading\n"); - return -EINVAL; + return libbpf_err(-EINVAL); } instances_fds = malloc(sizeof(int) * nr_instances); if (!instances_fds) { pr_warn("alloc memory failed for fds\n"); - return -ENOMEM; + return libbpf_err(-ENOMEM); } /* fill all fd with -1 */ @@ -8692,19 +8950,19 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n) int fd; if (!prog) - return -EINVAL; + return libbpf_err(-EINVAL); if (n >= prog->instances.nr || n < 0) { pr_warn("Can't get the %dth fd from program %s: only %d instances\n", n, prog->name, prog->instances.nr); - return -EINVAL; + return libbpf_err(-EINVAL); } fd = prog->instances.fds[n]; if (fd < 0) { pr_warn("%dth instance of program '%s' is invalid\n", n, prog->name); - return -ENOENT; + return libbpf_err(-ENOENT); } return fd; @@ -8730,7 +8988,7 @@ static bool bpf_program__is_type(const struct bpf_program *prog, int bpf_program__set_##NAME(struct bpf_program *prog) \ { \ if (!prog) \ - return -EINVAL; \ + return libbpf_err(-EINVAL); \ bpf_program__set_type(prog, TYPE); \ return 0; \ } \ @@ -8820,7 +9078,10 @@ static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, static const struct bpf_sec_def section_defs[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), - BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT), + BPF_EAPROG_SEC("sk_reuseport/migrate", BPF_PROG_TYPE_SK_REUSEPORT, + BPF_SK_REUSEPORT_SELECT_OR_MIGRATE), + BPF_EAPROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT, + BPF_SK_REUSEPORT_SELECT), SEC_DEF("kprobe/", KPROBE, .attach_fn = attach_kprobe), BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE), @@ -8884,6 +9145,8 @@ static const struct bpf_sec_def section_defs[] = { .expected_attach_type = BPF_TRACE_ITER, .is_attach_btf = true, .attach_fn = attach_iter), + SEC_DEF("syscall", SYSCALL, + .is_sleepable = true), BPF_EAPROG_SEC("xdp_devmap/", BPF_PROG_TYPE_XDP, BPF_XDP_DEVMAP), BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, @@ -9015,7 +9278,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, char *type_names; if (!name) - return -EINVAL; + return libbpf_err(-EINVAL); sec_def = find_sec_def(name); if (sec_def) { @@ -9031,7 +9294,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, free(type_names); } - return -ESRCH; + return libbpf_err(-ESRCH); } static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj, @@ -9173,6 +9436,28 @@ invalid_prog: #define BTF_ITER_PREFIX "bpf_iter_" #define BTF_MAX_NAME_SIZE 128 +void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, + const char **prefix, int *kind) +{ + switch (attach_type) { + case BPF_TRACE_RAW_TP: + *prefix = BTF_TRACE_PREFIX; + *kind = BTF_KIND_TYPEDEF; + break; + case BPF_LSM_MAC: + *prefix = BTF_LSM_PREFIX; + *kind = BTF_KIND_FUNC; + break; + case BPF_TRACE_ITER: + *prefix = BTF_ITER_PREFIX; + *kind = BTF_KIND_FUNC; + break; + default: + *prefix = ""; + *kind = BTF_KIND_FUNC; + } +} + static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, const char *name, __u32 kind) { @@ -9193,21 +9478,11 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, static inline int find_attach_btf_id(struct btf *btf, const char *name, enum bpf_attach_type attach_type) { - int err; + const char *prefix; + int kind; - if (attach_type == BPF_TRACE_RAW_TP) - err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name, - BTF_KIND_TYPEDEF); - else if (attach_type == BPF_LSM_MAC) - err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name, - BTF_KIND_FUNC); - else if (attach_type == BPF_TRACE_ITER) - err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name, - BTF_KIND_FUNC); - else - err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); - - return err; + btf_get_kernel_prefix_kind(attach_type, &prefix, &kind); + return find_btf_by_prefix_kind(btf, prefix, name, kind); } int libbpf_find_vmlinux_btf_id(const char *name, @@ -9217,9 +9492,10 @@ int libbpf_find_vmlinux_btf_id(const char *name, int err; btf = libbpf_find_kernel_btf(); - if (IS_ERR(btf)) { + err = libbpf_get_error(btf); + if (err) { pr_warn("vmlinux BTF is not found\n"); - return -EINVAL; + return libbpf_err(err); } err = find_attach_btf_id(btf, name, attach_type); @@ -9227,7 +9503,7 @@ int libbpf_find_vmlinux_btf_id(const char *name, pr_warn("%s is not found in vmlinux BTF\n", name); btf__free(btf); - return err; + return libbpf_err(err); } static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) @@ -9238,10 +9514,11 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) int err = -EINVAL; info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); - if (IS_ERR_OR_NULL(info_linear)) { + err = libbpf_get_error(info_linear); + if (err) { pr_warn("failed get_prog_info_linear for FD %d\n", attach_prog_fd); - return -EINVAL; + return err; } info = &info_linear->info; if (!info->btf_id) { @@ -9306,7 +9583,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, __u32 attach_prog_fd = prog->attach_prog_fd; const char *name = prog->sec_name, *attach_name; const struct bpf_sec_def *sec = NULL; - int i, err; + int i, err = 0; if (!name) return -EINVAL; @@ -9341,7 +9618,13 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd, } /* kernel/module BTF ID */ - err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id); + if (prog->obj->gen_loader) { + bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type); + *btf_obj_fd = 0; + *btf_type_id = 1; + } else { + err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id); + } if (err) { pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err); return err; @@ -9356,13 +9639,13 @@ int libbpf_attach_type_by_name(const char *name, int i; if (!name) - return -EINVAL; + return libbpf_err(-EINVAL); for (i = 0; i < ARRAY_SIZE(section_defs); i++) { if (strncmp(name, section_defs[i].sec, section_defs[i].len)) continue; if (!section_defs[i].is_attachable) - return -EINVAL; + return libbpf_err(-EINVAL); *attach_type = section_defs[i].expected_attach_type; return 0; } @@ -9373,17 +9656,17 @@ int libbpf_attach_type_by_name(const char *name, free(type_names); } - return -EINVAL; + return libbpf_err(-EINVAL); } int bpf_map__fd(const struct bpf_map *map) { - return map ? map->fd : -EINVAL; + return map ? map->fd : libbpf_err(-EINVAL); } const struct bpf_map_def *bpf_map__def(const struct bpf_map *map) { - return map ? &map->def : ERR_PTR(-EINVAL); + return map ? &map->def : libbpf_err_ptr(-EINVAL); } const char *bpf_map__name(const struct bpf_map *map) @@ -9399,7 +9682,7 @@ enum bpf_map_type bpf_map__type(const struct bpf_map *map) int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) { if (map->fd >= 0) - return -EBUSY; + return libbpf_err(-EBUSY); map->def.type = type; return 0; } @@ -9412,7 +9695,7 @@ __u32 bpf_map__map_flags(const struct bpf_map *map) int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) { if (map->fd >= 0) - return -EBUSY; + return libbpf_err(-EBUSY); map->def.map_flags = flags; return 0; } @@ -9425,7 +9708,7 @@ __u32 bpf_map__numa_node(const struct bpf_map *map) int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) { if (map->fd >= 0) - return -EBUSY; + return libbpf_err(-EBUSY); map->numa_node = numa_node; return 0; } @@ -9438,7 +9721,7 @@ __u32 bpf_map__key_size(const struct bpf_map *map) int bpf_map__set_key_size(struct bpf_map *map, __u32 size) { if (map->fd >= 0) - return -EBUSY; + return libbpf_err(-EBUSY); map->def.key_size = size; return 0; } @@ -9451,7 +9734,7 @@ __u32 bpf_map__value_size(const struct bpf_map *map) int bpf_map__set_value_size(struct bpf_map *map, __u32 size) { if (map->fd >= 0) - return -EBUSY; + return libbpf_err(-EBUSY); map->def.value_size = size; return 0; } @@ -9470,7 +9753,7 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv, bpf_map_clear_priv_t clear_priv) { if (!map) - return -EINVAL; + return libbpf_err(-EINVAL); if (map->priv) { if (map->clear_priv) @@ -9484,7 +9767,7 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv, void *bpf_map__priv(const struct bpf_map *map) { - return map ? map->priv : ERR_PTR(-EINVAL); + return map ? map->priv : libbpf_err_ptr(-EINVAL); } int bpf_map__set_initial_value(struct bpf_map *map, @@ -9492,12 +9775,20 @@ int bpf_map__set_initial_value(struct bpf_map *map, { if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG || size != map->def.value_size || map->fd >= 0) - return -EINVAL; + return libbpf_err(-EINVAL); memcpy(map->mmaped, data, size); return 0; } +const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) +{ + if (!map->mmaped) + return NULL; + *psize = map->def.value_size; + return map->mmaped; +} + bool bpf_map__is_offload_neutral(const struct bpf_map *map) { return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; @@ -9516,7 +9807,7 @@ __u32 bpf_map__ifindex(const struct bpf_map *map) int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) { if (map->fd >= 0) - return -EBUSY; + return libbpf_err(-EBUSY); map->map_ifindex = ifindex; return 0; } @@ -9525,11 +9816,11 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) { if (!bpf_map_type__is_map_in_map(map->def.type)) { pr_warn("error: unsupported map type\n"); - return -EINVAL; + return libbpf_err(-EINVAL); } if (map->inner_map_fd != -1) { pr_warn("error: inner_map_fd already specified\n"); - return -EINVAL; + return libbpf_err(-EINVAL); } zfree(&map->inner_map); map->inner_map_fd = fd; @@ -9543,7 +9834,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) struct bpf_map *s, *e; if (!obj || !obj->maps) - return NULL; + return errno = EINVAL, NULL; s = obj->maps; e = obj->maps + obj->nr_maps; @@ -9551,7 +9842,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) if ((m < s) || (m >= e)) { pr_warn("error in %s: map handler doesn't belong to object\n", __func__); - return NULL; + return errno = EINVAL, NULL; } idx = (m - obj->maps) + i; @@ -9590,7 +9881,7 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name) if (pos->name && !strcmp(pos->name, name)) return pos; } - return NULL; + return errno = ENOENT, NULL; } int @@ -9602,12 +9893,23 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) { - return ERR_PTR(-ENOTSUP); + return libbpf_err_ptr(-ENOTSUP); } long libbpf_get_error(const void *ptr) { - return PTR_ERR_OR_ZERO(ptr); + if (!IS_ERR_OR_NULL(ptr)) + return 0; + + if (IS_ERR(ptr)) + errno = -PTR_ERR(ptr); + + /* If ptr == NULL, then errno should be already set by the failing + * API, because libbpf never returns NULL on success and it now always + * sets errno on error. So no extra errno handling for ptr == NULL + * case. + */ + return -errno; } int bpf_prog_load(const char *file, enum bpf_prog_type type, @@ -9633,16 +9935,17 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, int err; if (!attr) - return -EINVAL; + return libbpf_err(-EINVAL); if (!attr->file) - return -EINVAL; + return libbpf_err(-EINVAL); open_attr.file = attr->file; open_attr.prog_type = attr->prog_type; obj = bpf_object__open_xattr(&open_attr); - if (IS_ERR_OR_NULL(obj)) - return -ENOENT; + err = libbpf_get_error(obj); + if (err) + return libbpf_err(-ENOENT); bpf_object__for_each_program(prog, obj) { enum bpf_attach_type attach_type = attr->expected_attach_type; @@ -9662,7 +9965,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, * didn't provide a fallback type, too bad... */ bpf_object__close(obj); - return -EINVAL; + return libbpf_err(-EINVAL); } prog->prog_ifindex = attr->ifindex; @@ -9680,13 +9983,13 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, if (!first_prog) { pr_warn("object file doesn't contain bpf program\n"); bpf_object__close(obj); - return -ENOENT; + return libbpf_err(-ENOENT); } err = bpf_object__load(obj); if (err) { bpf_object__close(obj); - return err; + return libbpf_err(err); } *pobj = obj; @@ -9705,7 +10008,10 @@ struct bpf_link { /* Replace link's underlying BPF program with the new one */ int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) { - return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL); + int ret; + + ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL); + return libbpf_err_errno(ret); } /* Release "ownership" of underlying BPF resource (typically, BPF program @@ -9738,7 +10044,7 @@ int bpf_link__destroy(struct bpf_link *link) free(link->pin_path); free(link); - return err; + return libbpf_err(err); } int bpf_link__fd(const struct bpf_link *link) @@ -9753,7 +10059,7 @@ const char *bpf_link__pin_path(const struct bpf_link *link) static int bpf_link__detach_fd(struct bpf_link *link) { - return close(link->fd); + return libbpf_err_errno(close(link->fd)); } struct bpf_link *bpf_link__open(const char *path) @@ -9765,13 +10071,13 @@ struct bpf_link *bpf_link__open(const char *path) if (fd < 0) { fd = -errno; pr_warn("failed to open link at %s: %d\n", path, fd); - return ERR_PTR(fd); + return libbpf_err_ptr(fd); } link = calloc(1, sizeof(*link)); if (!link) { close(fd); - return ERR_PTR(-ENOMEM); + return libbpf_err_ptr(-ENOMEM); } link->detach = &bpf_link__detach_fd; link->fd = fd; @@ -9779,7 +10085,7 @@ struct bpf_link *bpf_link__open(const char *path) link->pin_path = strdup(path); if (!link->pin_path) { bpf_link__destroy(link); - return ERR_PTR(-ENOMEM); + return libbpf_err_ptr(-ENOMEM); } return link; @@ -9795,22 +10101,22 @@ int bpf_link__pin(struct bpf_link *link, const char *path) int err; if (link->pin_path) - return -EBUSY; + return libbpf_err(-EBUSY); err = make_parent_dir(path); if (err) - return err; + return libbpf_err(err); err = check_path(path); if (err) - return err; + return libbpf_err(err); link->pin_path = strdup(path); if (!link->pin_path) - return -ENOMEM; + return libbpf_err(-ENOMEM); if (bpf_obj_pin(link->fd, link->pin_path)) { err = -errno; zfree(&link->pin_path); - return err; + return libbpf_err(err); } pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path); @@ -9822,11 +10128,11 @@ int bpf_link__unpin(struct bpf_link *link) int err; if (!link->pin_path) - return -EINVAL; + return libbpf_err(-EINVAL); err = unlink(link->pin_path); if (err != 0) - return -errno; + return libbpf_err_errno(err); pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); zfree(&link->pin_path); @@ -9842,11 +10148,10 @@ static int bpf_link__detach_perf_event(struct bpf_link *link) err = -errno; close(link->fd); - return err; + return libbpf_err(err); } -struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, - int pfd) +struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd) { char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; @@ -9855,18 +10160,18 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, if (pfd < 0) { pr_warn("prog '%s': invalid perf event FD %d\n", prog->name, pfd); - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); } prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n", prog->name); - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); } link = calloc(1, sizeof(*link)); if (!link) - return ERR_PTR(-ENOMEM); + return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_perf_event; link->fd = pfd; @@ -9878,14 +10183,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, if (err == -EPROTO) pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", prog->name, pfd); - return ERR_PTR(err); + return libbpf_err_ptr(err); } if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; free(link); pr_warn("prog '%s': failed to enable pfd %d: %s\n", prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return ERR_PTR(err); + return libbpf_err_ptr(err); } return link; } @@ -10009,16 +10314,16 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return ERR_PTR(pfd); + return libbpf_err_ptr(pfd); } link = bpf_program__attach_perf_event(prog, pfd); - if (IS_ERR(link)) { + err = libbpf_get_error(link); + if (err) { close(pfd); - err = PTR_ERR(link); pr_warn("prog '%s': failed to attach to %s '%s': %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return link; + return libbpf_err_ptr(err); } return link; } @@ -10051,17 +10356,17 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return ERR_PTR(pfd); + return libbpf_err_ptr(pfd); } link = bpf_program__attach_perf_event(prog, pfd); - if (IS_ERR(link)) { + err = libbpf_get_error(link); + if (err) { close(pfd); - err = PTR_ERR(link); pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return link; + return libbpf_err_ptr(err); } return link; } @@ -10129,16 +10434,16 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", prog->name, tp_category, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return ERR_PTR(pfd); + return libbpf_err_ptr(pfd); } link = bpf_program__attach_perf_event(prog, pfd); - if (IS_ERR(link)) { + err = libbpf_get_error(link); + if (err) { close(pfd); - err = PTR_ERR(link); pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", prog->name, tp_category, tp_name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); - return link; + return libbpf_err_ptr(err); } return link; } @@ -10151,20 +10456,19 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, sec_name = strdup(prog->sec_name); if (!sec_name) - return ERR_PTR(-ENOMEM); + return libbpf_err_ptr(-ENOMEM); /* extract "tp/<category>/<name>" */ tp_cat = sec_name + sec->len; tp_name = strchr(tp_cat, '/'); if (!tp_name) { - link = ERR_PTR(-EINVAL); - goto out; + free(sec_name); + return libbpf_err_ptr(-EINVAL); } *tp_name = '\0'; tp_name++; link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); -out: free(sec_name); return link; } @@ -10179,12 +10483,12 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); } link = calloc(1, sizeof(*link)); if (!link) - return ERR_PTR(-ENOMEM); + return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); @@ -10193,7 +10497,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, free(link); pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return ERR_PTR(pfd); + return libbpf_err_ptr(pfd); } link->fd = pfd; return link; @@ -10217,12 +10521,12 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog) prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); } link = calloc(1, sizeof(*link)); if (!link) - return ERR_PTR(-ENOMEM); + return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(NULL, prog_fd); @@ -10231,7 +10535,7 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog) free(link); pr_warn("prog '%s': failed to attach: %s\n", prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); - return ERR_PTR(pfd); + return libbpf_err_ptr(pfd); } link->fd = pfd; return (struct bpf_link *)link; @@ -10259,12 +10563,6 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, return bpf_program__attach_lsm(prog); } -static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, - struct bpf_program *prog) -{ - return bpf_program__attach_iter(prog, NULL); -} - static struct bpf_link * bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id, const char *target_name) @@ -10279,12 +10577,12 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id, prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); } link = calloc(1, sizeof(*link)); if (!link) - return ERR_PTR(-ENOMEM); + return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; attach_type = bpf_program__get_expected_attach_type(prog); @@ -10295,7 +10593,7 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id, pr_warn("prog '%s': failed to attach to %s: %s\n", prog->name, target_name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); - return ERR_PTR(link_fd); + return libbpf_err_ptr(link_fd); } link->fd = link_fd; return link; @@ -10328,19 +10626,19 @@ struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog, if (!!target_fd != !!attach_func_name) { pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n", prog->name); - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); } if (prog->type != BPF_PROG_TYPE_EXT) { pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", prog->name); - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); } if (target_fd) { btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd); if (btf_id < 0) - return ERR_PTR(btf_id); + return libbpf_err_ptr(btf_id); return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace"); } else { @@ -10362,7 +10660,7 @@ bpf_program__attach_iter(struct bpf_program *prog, __u32 target_fd = 0; if (!OPTS_VALID(opts, bpf_iter_attach_opts)) - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0); link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0); @@ -10370,12 +10668,12 @@ bpf_program__attach_iter(struct bpf_program *prog, prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("prog '%s': can't attach before loaded\n", prog->name); - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); } link = calloc(1, sizeof(*link)); if (!link) - return ERR_PTR(-ENOMEM); + return libbpf_err_ptr(-ENOMEM); link->detach = &bpf_link__detach_fd; link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER, @@ -10385,19 +10683,25 @@ bpf_program__attach_iter(struct bpf_program *prog, free(link); pr_warn("prog '%s': failed to attach to iterator: %s\n", prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); - return ERR_PTR(link_fd); + return libbpf_err_ptr(link_fd); } link->fd = link_fd; return link; } +static struct bpf_link *attach_iter(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + return bpf_program__attach_iter(prog, NULL); +} + struct bpf_link *bpf_program__attach(struct bpf_program *prog) { const struct bpf_sec_def *sec_def; sec_def = find_sec_def(prog->sec_name); if (!sec_def || !sec_def->attach_fn) - return ERR_PTR(-ESRCH); + return libbpf_err_ptr(-ESRCH); return sec_def->attach_fn(sec_def, prog); } @@ -10420,11 +10724,11 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) int err; if (!bpf_map__is_struct_ops(map) || map->fd == -1) - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); link = calloc(1, sizeof(*link)); if (!link) - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); st_ops = map->st_ops; for (i = 0; i < btf_vlen(st_ops->type); i++) { @@ -10444,7 +10748,7 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) if (err) { err = -errno; free(link); - return ERR_PTR(err); + return libbpf_err_ptr(err); } link->detach = bpf_link__detach_struct_ops; @@ -10498,7 +10802,7 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, } ring_buffer_write_tail(header, data_tail); - return ret; + return libbpf_err(ret); } struct perf_buffer; @@ -10651,7 +10955,7 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, p.lost_cb = opts ? opts->lost_cb : NULL; p.ctx = opts ? opts->ctx : NULL; - return __perf_buffer__new(map_fd, page_cnt, &p); + return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } struct perf_buffer * @@ -10667,7 +10971,7 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt, p.cpus = opts->cpus; p.map_keys = opts->map_keys; - return __perf_buffer__new(map_fd, page_cnt, &p); + return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p)); } static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, @@ -10888,16 +11192,19 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) int i, cnt, err; cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms); + if (cnt < 0) + return libbpf_err_errno(cnt); + for (i = 0; i < cnt; i++) { struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr; err = perf_buffer__process_records(pb, cpu_buf); if (err) { pr_warn("error while processing records: %d\n", err); - return err; + return libbpf_err(err); } } - return cnt < 0 ? -errno : cnt; + return cnt; } /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer @@ -10918,11 +11225,11 @@ int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx) struct perf_cpu_buf *cpu_buf; if (buf_idx >= pb->cpu_cnt) - return -EINVAL; + return libbpf_err(-EINVAL); cpu_buf = pb->cpu_bufs[buf_idx]; if (!cpu_buf) - return -ENOENT; + return libbpf_err(-ENOENT); return cpu_buf->fd; } @@ -10940,11 +11247,11 @@ int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx) struct perf_cpu_buf *cpu_buf; if (buf_idx >= pb->cpu_cnt) - return -EINVAL; + return libbpf_err(-EINVAL); cpu_buf = pb->cpu_bufs[buf_idx]; if (!cpu_buf) - return -ENOENT; + return libbpf_err(-ENOENT); return perf_buffer__process_records(pb, cpu_buf); } @@ -10962,7 +11269,7 @@ int perf_buffer__consume(struct perf_buffer *pb) err = perf_buffer__process_records(pb, cpu_buf); if (err) { pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err); - return err; + return libbpf_err(err); } } return 0; @@ -11074,13 +11381,13 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays) void *ptr; if (arrays >> BPF_PROG_INFO_LAST_ARRAY) - return ERR_PTR(-EINVAL); + return libbpf_err_ptr(-EINVAL); /* step 1: get array dimensions */ err = bpf_obj_get_info_by_fd(fd, &info, &info_len); if (err) { pr_debug("can't get prog info: %s", strerror(errno)); - return ERR_PTR(-EFAULT); + return libbpf_err_ptr(-EFAULT); } /* step 2: calculate total size of all arrays */ @@ -11112,7 +11419,7 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays) data_len = roundup(data_len, sizeof(__u64)); info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len); if (!info_linear) - return ERR_PTR(-ENOMEM); + return libbpf_err_ptr(-ENOMEM); /* step 4: fill data to info_linear->info */ info_linear->arrays = arrays; @@ -11144,7 +11451,7 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays) if (err) { pr_debug("can't get prog info: %s", strerror(errno)); free(info_linear); - return ERR_PTR(-EFAULT); + return libbpf_err_ptr(-EFAULT); } /* step 6: verify the data */ @@ -11223,26 +11530,26 @@ int bpf_program__set_attach_target(struct bpf_program *prog, int btf_obj_fd = 0, btf_id = 0, err; if (!prog || attach_prog_fd < 0 || !attach_func_name) - return -EINVAL; + return libbpf_err(-EINVAL); if (prog->obj->loaded) - return -EINVAL; + return libbpf_err(-EINVAL); if (attach_prog_fd) { btf_id = libbpf_find_prog_btf_id(attach_func_name, attach_prog_fd); if (btf_id < 0) - return btf_id; + return libbpf_err(btf_id); } else { /* load btf_vmlinux, if not yet */ err = bpf_object__load_vmlinux_btf(prog->obj, true); if (err) - return err; + return libbpf_err(err); err = find_kernel_btf_id(prog->obj, attach_func_name, prog->expected_attach_type, &btf_obj_fd, &btf_id); if (err) - return err; + return libbpf_err(err); } prog->attach_btf_id = btf_id; @@ -11341,7 +11648,7 @@ int libbpf_num_possible_cpus(void) err = parse_cpu_mask_file(fcpu, &mask, &n); if (err) - return err; + return libbpf_err(err); tmp_cpus = 0; for (i = 0; i < n; i++) { @@ -11361,7 +11668,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s, .object_name = s->name, ); struct bpf_object *obj; - int i; + int i, err; /* Attempt to preserve opts->object_name, unless overriden by user * explicitly. Overwriting object name for skeletons is discouraged, @@ -11376,10 +11683,11 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s, } obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); - if (IS_ERR(obj)) { - pr_warn("failed to initialize skeleton BPF object '%s': %ld\n", - s->name, PTR_ERR(obj)); - return PTR_ERR(obj); + err = libbpf_get_error(obj); + if (err) { + pr_warn("failed to initialize skeleton BPF object '%s': %d\n", + s->name, err); + return libbpf_err(err); } *s->obj = obj; @@ -11392,7 +11700,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s, *map = bpf_object__find_map_by_name(obj, name); if (!*map) { pr_warn("failed to find skeleton map '%s'\n", name); - return -ESRCH; + return libbpf_err(-ESRCH); } /* externs shouldn't be pre-setup from user code */ @@ -11407,7 +11715,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s, *prog = bpf_object__find_program_by_name(obj, name); if (!*prog) { pr_warn("failed to find skeleton program '%s'\n", name); - return -ESRCH; + return libbpf_err(-ESRCH); } } @@ -11421,7 +11729,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) err = bpf_object__load(*s->obj); if (err) { pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); - return err; + return libbpf_err(err); } for (i = 0; i < s->map_cnt; i++) { @@ -11460,7 +11768,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) *mmaped = NULL; pr_warn("failed to re-mmap() map '%s': %d\n", bpf_map__name(map), err); - return err; + return libbpf_err(err); } } @@ -11469,7 +11777,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) { - int i; + int i, err; for (i = 0; i < s->prog_cnt; i++) { struct bpf_program *prog = *s->progs[i].prog; @@ -11484,10 +11792,11 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) continue; *link = sec_def->attach_fn(sec_def, prog); - if (IS_ERR(*link)) { - pr_warn("failed to auto-attach program '%s': %ld\n", - bpf_program__name(prog), PTR_ERR(*link)); - return PTR_ERR(*link); + err = libbpf_get_error(*link); + if (err) { + pr_warn("failed to auto-attach program '%s': %d\n", + bpf_program__name(prog), err); + return libbpf_err(err); } } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index bec4e6a6e31d..6e61342ba56c 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -18,6 +18,7 @@ #include <linux/bpf.h> #include "libbpf_common.h" +#include "libbpf_legacy.h" #ifdef __cplusplus extern "C" { @@ -471,6 +472,7 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); +LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); @@ -498,6 +500,7 @@ LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd); +/* XDP related API */ struct xdp_link_info { __u32 prog_id; __u32 drv_prog_id; @@ -520,6 +523,49 @@ LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, size_t info_size, __u32 flags); +/* TC related API */ +enum bpf_tc_attach_point { + BPF_TC_INGRESS = 1 << 0, + BPF_TC_EGRESS = 1 << 1, + BPF_TC_CUSTOM = 1 << 2, +}; + +#define BPF_TC_PARENT(a, b) \ + ((((a) << 16) & 0xFFFF0000U) | ((b) & 0x0000FFFFU)) + +enum bpf_tc_flags { + BPF_TC_F_REPLACE = 1 << 0, +}; + +struct bpf_tc_hook { + size_t sz; + int ifindex; + enum bpf_tc_attach_point attach_point; + __u32 parent; + size_t :0; +}; +#define bpf_tc_hook__last_field parent + +struct bpf_tc_opts { + size_t sz; + int prog_fd; + __u32 flags; + __u32 prog_id; + __u32 handle; + __u32 priority; + size_t :0; +}; +#define bpf_tc_opts__last_field priority + +LIBBPF_API int bpf_tc_hook_create(struct bpf_tc_hook *hook); +LIBBPF_API int bpf_tc_hook_destroy(struct bpf_tc_hook *hook); +LIBBPF_API int bpf_tc_attach(const struct bpf_tc_hook *hook, + struct bpf_tc_opts *opts); +LIBBPF_API int bpf_tc_detach(const struct bpf_tc_hook *hook, + const struct bpf_tc_opts *opts); +LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook, + struct bpf_tc_opts *opts); + /* Ring buffer APIs */ struct ring_buffer; @@ -756,6 +802,18 @@ LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s); LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s); LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s); +struct gen_loader_opts { + size_t sz; /* size of this struct, for forward/backward compatiblity */ + const char *data; + const char *insns; + __u32 data_sz; + __u32 insns_sz; +}; + +#define gen_loader_opts__last_field insns_sz +LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj, + struct gen_loader_opts *opts); + enum libbpf_tristate { TRI_NO = 0, TRI_YES = 1, @@ -768,10 +826,18 @@ struct bpf_linker_opts { }; #define bpf_linker_opts__last_field sz +struct bpf_linker_file_opts { + /* size of this struct, for forward/backward compatiblity */ + size_t sz; +}; +#define bpf_linker_file_opts__last_field sz + struct bpf_linker; LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts); -LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filename); +LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, + const char *filename, + const struct bpf_linker_file_opts *opts); LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker); LIBBPF_API void bpf_linker__free(struct bpf_linker *linker); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index b9b29baf1df8..944c99d1ded3 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -361,4 +361,17 @@ LIBBPF_0.4.0 { bpf_linker__new; bpf_map__inner_map; bpf_object__set_kversion; + bpf_tc_attach; + bpf_tc_detach; + bpf_tc_hook_create; + bpf_tc_hook_destroy; + bpf_tc_query; } LIBBPF_0.3.0; + +LIBBPF_0.5.0 { + global: + bpf_map__initial_value; + bpf_map_lookup_and_delete_elem_flags; + bpf_object__gen_loader; + libbpf_set_strict_mode; +} LIBBPF_0.4.0; diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c index 0afb51f7a919..96f67a772a1b 100644 --- a/tools/lib/bpf/libbpf_errno.c +++ b/tools/lib/bpf/libbpf_errno.c @@ -12,6 +12,7 @@ #include <string.h> #include "libbpf.h" +#include "libbpf_internal.h" /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 @@ -39,7 +40,7 @@ static const char *libbpf_strerror_table[NR_ERRNO] = { int libbpf_strerror(int err, char *buf, size_t size) { if (!buf || !size) - return -1; + return libbpf_err(-EINVAL); err = err > 0 ? err : -err; @@ -48,7 +49,7 @@ int libbpf_strerror(int err, char *buf, size_t size) ret = strerror_r(err, buf, size); buf[size - 1] = '\0'; - return ret; + return libbpf_err_errno(ret); } if (err < __LIBBPF_ERRNO__END) { @@ -62,5 +63,5 @@ int libbpf_strerror(int err, char *buf, size_t size) snprintf(buf, size, "Unknown libbpf error %d", err); buf[size - 1] = '\0'; - return -1; + return libbpf_err(-ENOENT); } diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index ee426226928f..016ca7cb4f8a 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -11,6 +11,9 @@ #include <stdlib.h> #include <limits.h> +#include <errno.h> +#include <linux/err.h> +#include "libbpf_legacy.h" /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 @@ -28,6 +31,12 @@ #ifndef R_BPF_64_64 #define R_BPF_64_64 1 #endif +#ifndef R_BPF_64_ABS64 +#define R_BPF_64_ABS64 2 +#endif +#ifndef R_BPF_64_ABS32 +#define R_BPF_64_ABS32 3 +#endif #ifndef R_BPF_64_32 #define R_BPF_64_32 10 #endif @@ -41,6 +50,11 @@ #define ELF_C_READ_MMAP ELF_C_READ #endif +/* Older libelf all end up in this expression, for both 32 and 64 bit */ +#ifndef GELF_ST_VISIBILITY +#define GELF_ST_VISIBILITY(o) ((o) & 0x03) +#endif + #define BTF_INFO_ENC(kind, kind_flag, vlen) \ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) @@ -258,6 +272,8 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name, int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, __u32 *off); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); +void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, + const char **prefix, int *kind); struct btf_ext_info { /* @@ -428,4 +444,54 @@ int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ct int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx); int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx); +extern enum libbpf_strict_mode libbpf_mode; + +/* handle direct returned errors */ +static inline int libbpf_err(int ret) +{ + if (ret < 0) + errno = -ret; + return ret; +} + +/* handle errno-based (e.g., syscall or libc) errors according to libbpf's + * strict mode settings + */ +static inline int libbpf_err_errno(int ret) +{ + if (libbpf_mode & LIBBPF_STRICT_DIRECT_ERRS) + /* errno is already assumed to be set on error */ + return ret < 0 ? -errno : ret; + + /* legacy: on error return -1 directly and don't touch errno */ + return ret; +} + +/* handle error for pointer-returning APIs, err is assumed to be < 0 always */ +static inline void *libbpf_err_ptr(int err) +{ + /* set errno on error, this doesn't break anything */ + errno = -err; + + if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS) + return NULL; + + /* legacy: encode err as ptr */ + return ERR_PTR(err); +} + +/* handle pointer-returning APIs' error handling */ +static inline void *libbpf_ptr(void *ret) +{ + /* set errno on error, this doesn't break anything */ + if (IS_ERR(ret)) + errno = -PTR_ERR(ret); + + if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS) + return IS_ERR(ret) ? NULL : ret; + + /* legacy: pass-through original pointer */ + return ret; +} + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h new file mode 100644 index 000000000000..df0d03dcffab --- /dev/null +++ b/tools/lib/bpf/libbpf_legacy.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * Libbpf legacy APIs (either discouraged or deprecated, as mentioned in [0]) + * + * [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY + * + * Copyright (C) 2021 Facebook + */ +#ifndef __LIBBPF_LEGACY_BPF_H +#define __LIBBPF_LEGACY_BPF_H + +#include <linux/bpf.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include "libbpf_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum libbpf_strict_mode { + /* Turn on all supported strict features of libbpf to simulate libbpf + * v1.0 behavior. + * This will be the default behavior in libbpf v1.0. + */ + LIBBPF_STRICT_ALL = 0xffffffff, + + /* + * Disable any libbpf 1.0 behaviors. This is the default before libbpf + * v1.0. It won't be supported anymore in v1.0, please update your + * code so that it handles LIBBPF_STRICT_ALL mode before libbpf v1.0. + */ + LIBBPF_STRICT_NONE = 0x00, + /* + * Return NULL pointers on error, not ERR_PTR(err). + * Additionally, libbpf also always sets errno to corresponding Exx + * (positive) error code. + */ + LIBBPF_STRICT_CLEAN_PTRS = 0x01, + /* + * Return actual error codes from low-level APIs directly, not just -1. + * Additionally, libbpf also always sets errno to corresponding Exx + * (positive) error code. + */ + LIBBPF_STRICT_DIRECT_ERRS = 0x02, + + __LIBBPF_STRICT_LAST, +}; + +LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __LIBBPF_LEGACY_BPF_H */ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 9de084b1c699..10911a8cad0f 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -158,7 +158,9 @@ struct bpf_linker { static int init_output_elf(struct bpf_linker *linker, const char *file); -static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj); +static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, + const struct bpf_linker_file_opts *opts, + struct src_obj *obj); static int linker_sanity_check_elf(struct src_obj *obj); static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec); static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec); @@ -218,16 +220,16 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts int err; if (!OPTS_VALID(opts, bpf_linker_opts)) - return NULL; + return errno = EINVAL, NULL; if (elf_version(EV_CURRENT) == EV_NONE) { pr_warn_elf("libelf initialization failed"); - return NULL; + return errno = EINVAL, NULL; } linker = calloc(1, sizeof(*linker)); if (!linker) - return NULL; + return errno = ENOMEM, NULL; linker->fd = -1; @@ -239,7 +241,7 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts err_out: bpf_linker__free(linker); - return NULL; + return errno = -err, NULL; } static struct dst_sec *add_dst_sec(struct bpf_linker *linker, const char *sec_name) @@ -435,15 +437,19 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) return 0; } -int bpf_linker__add_file(struct bpf_linker *linker, const char *filename) +int bpf_linker__add_file(struct bpf_linker *linker, const char *filename, + const struct bpf_linker_file_opts *opts) { struct src_obj obj = {}; int err = 0; + if (!OPTS_VALID(opts, bpf_linker_file_opts)) + return libbpf_err(-EINVAL); + if (!linker->elf) - return -EINVAL; + return libbpf_err(-EINVAL); - err = err ?: linker_load_obj_file(linker, filename, &obj); + err = err ?: linker_load_obj_file(linker, filename, opts, &obj); err = err ?: linker_append_sec_data(linker, &obj); err = err ?: linker_append_elf_syms(linker, &obj); err = err ?: linker_append_elf_relos(linker, &obj); @@ -461,7 +467,7 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename) if (obj.fd >= 0) close(obj.fd); - return err; + return libbpf_err(err); } static bool is_dwarf_sec_name(const char *name) @@ -529,7 +535,9 @@ static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name) return sec; } -static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj) +static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, + const struct bpf_linker_file_opts *opts, + struct src_obj *obj) { #if __BYTE_ORDER == __LITTLE_ENDIAN const int host_endianness = ELFDATA2LSB; @@ -884,7 +892,8 @@ static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *se size_t sym_idx = ELF64_R_SYM(relo->r_info); size_t sym_type = ELF64_R_TYPE(relo->r_info); - if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) { + if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32 && + sym_type != R_BPF_64_ABS64 && sym_type != R_BPF_64_ABS32) { pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n", i, sec->sec_idx, sym_type, obj->filename); return -EINVAL; @@ -1780,7 +1789,7 @@ static void sym_update_visibility(Elf64_Sym *sym, int sym_vis) /* libelf doesn't provide setters for ST_VISIBILITY, * but it is stored in the lower 2 bits of st_other */ - sym->st_other &= 0x03; + sym->st_other &= ~0x03; sym->st_other |= sym_vis; } @@ -2539,11 +2548,11 @@ int bpf_linker__finalize(struct bpf_linker *linker) int err, i; if (!linker->elf) - return -EINVAL; + return libbpf_err(-EINVAL); err = finalize_btf(linker); if (err) - return err; + return libbpf_err(err); /* Finalize strings */ strs_sz = strset__data_size(linker->strtab_strs); @@ -2575,14 +2584,14 @@ int bpf_linker__finalize(struct bpf_linker *linker) if (elf_update(linker->elf, ELF_C_NULL) < 0) { err = -errno; pr_warn_elf("failed to finalize ELF layout"); - return err; + return libbpf_err(err); } /* Write out final ELF contents */ if (elf_update(linker->elf, ELF_C_WRITE) < 0) { err = -errno; pr_warn_elf("failed to write ELF contents"); - return err; + return libbpf_err(err); } elf_end(linker->elf); diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index d2cb28e9ef52..cf9381f03b16 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -4,7 +4,10 @@ #include <stdlib.h> #include <memory.h> #include <unistd.h> +#include <arpa/inet.h> #include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/pkt_cls.h> #include <linux/rtnetlink.h> #include <sys/socket.h> #include <errno.h> @@ -73,9 +76,20 @@ cleanup: return ret; } -static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, - __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, - void *cookie) +static void libbpf_netlink_close(int sock) +{ + close(sock); +} + +enum { + NL_CONT, + NL_NEXT, + NL_DONE, +}; + +static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, + __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, + void *cookie) { bool multipart = true; struct nlmsgerr *err; @@ -84,6 +98,7 @@ static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, int len, ret; while (multipart) { +start: multipart = false; len = recv(sock, buf, sizeof(buf), 0); if (len < 0) { @@ -121,8 +136,16 @@ static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq, } if (_fn) { ret = _fn(nh, fn, cookie); - if (ret) + switch (ret) { + case NL_CONT: + break; + case NL_NEXT: + goto start; + case NL_DONE: + return 0; + default: return ret; + } } } } @@ -131,95 +154,97 @@ done: return ret; } +static int libbpf_netlink_send_recv(struct nlmsghdr *nh, + __dump_nlmsg_t parse_msg, + libbpf_dump_nlmsg_t parse_attr, + void *cookie) +{ + __u32 nl_pid = 0; + int sock, ret; + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + nh->nlmsg_pid = 0; + nh->nlmsg_seq = time(NULL); + + if (send(sock, nh, nh->nlmsg_len, 0) < 0) { + ret = -errno; + goto out; + } + + ret = libbpf_netlink_recv(sock, nl_pid, nh->nlmsg_seq, + parse_msg, parse_attr, cookie); +out: + libbpf_netlink_close(sock); + return ret; +} + static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd, __u32 flags) { - int sock, seq = 0, ret; - struct nlattr *nla, *nla_xdp; + struct nlattr *nla; + int ret; struct { struct nlmsghdr nh; struct ifinfomsg ifinfo; char attrbuf[64]; } req; - __u32 nl_pid = 0; - - sock = libbpf_netlink_open(&nl_pid); - if (sock < 0) - return sock; memset(&req, 0, sizeof(req)); - req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); - req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; - req.nh.nlmsg_type = RTM_SETLINK; - req.nh.nlmsg_pid = 0; - req.nh.nlmsg_seq = ++seq; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; req.ifinfo.ifi_family = AF_UNSPEC; - req.ifinfo.ifi_index = ifindex; + req.ifinfo.ifi_index = ifindex; - /* started nested attribute for XDP */ - nla = (struct nlattr *)(((char *)&req) - + NLMSG_ALIGN(req.nh.nlmsg_len)); - nla->nla_type = NLA_F_NESTED | IFLA_XDP; - nla->nla_len = NLA_HDRLEN; - - /* add XDP fd */ - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_FD; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); - memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); - nla->nla_len += nla_xdp->nla_len; - - /* if user passed in any flags, add those too */ + nla = nlattr_begin_nested(&req.nh, sizeof(req), IFLA_XDP); + if (!nla) + return -EMSGSIZE; + ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_FD, &fd, sizeof(fd)); + if (ret < 0) + return ret; if (flags) { - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_FLAGS; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); - memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); - nla->nla_len += nla_xdp->nla_len; + ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_FLAGS, &flags, + sizeof(flags)); + if (ret < 0) + return ret; } - if (flags & XDP_FLAGS_REPLACE) { - nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); - nla_xdp->nla_type = IFLA_XDP_EXPECTED_FD; - nla_xdp->nla_len = NLA_HDRLEN + sizeof(old_fd); - memcpy((char *)nla_xdp + NLA_HDRLEN, &old_fd, sizeof(old_fd)); - nla->nla_len += nla_xdp->nla_len; - } - - req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); - - if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { - ret = -errno; - goto cleanup; + ret = nlattr_add(&req.nh, sizeof(req), IFLA_XDP_EXPECTED_FD, + &old_fd, sizeof(old_fd)); + if (ret < 0) + return ret; } - ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL); + nlattr_end_nested(&req.nh, nla); -cleanup: - close(sock); - return ret; + return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL); } int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, const struct bpf_xdp_set_link_opts *opts) { - int old_fd = -1; + int old_fd = -1, ret; if (!OPTS_VALID(opts, bpf_xdp_set_link_opts)) - return -EINVAL; + return libbpf_err(-EINVAL); if (OPTS_HAS(opts, old_fd)) { old_fd = OPTS_GET(opts, old_fd, -1); flags |= XDP_FLAGS_REPLACE; } - return __bpf_set_link_xdp_fd_replace(ifindex, fd, - old_fd, - flags); + ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags); + return libbpf_err(ret); } int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags) { - return __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags); + int ret; + + ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags); + return libbpf_err(ret); } static int __dump_link_nlmsg(struct nlmsghdr *nlh, @@ -231,6 +256,7 @@ static int __dump_link_nlmsg(struct nlmsghdr *nlh, len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi)); attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi))); + if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0) return -LIBBPF_ERRNO__NLPARSE; @@ -282,34 +308,36 @@ static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) return 0; } -static int libbpf_nl_get_link(int sock, unsigned int nl_pid, - libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); - int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, size_t info_size, __u32 flags) { struct xdp_id_md xdp_id = {}; - int sock, ret; - __u32 nl_pid = 0; __u32 mask; + int ret; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifm; + } req = { + .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nh.nlmsg_type = RTM_GETLINK, + .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .ifm.ifi_family = AF_PACKET, + }; if (flags & ~XDP_FLAGS_MASK || !info_size) - return -EINVAL; + return libbpf_err(-EINVAL); /* Check whether the single {HW,DRV,SKB} mode is set */ flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE); mask = flags - 1; if (flags && flags & mask) - return -EINVAL; - - sock = libbpf_netlink_open(&nl_pid); - if (sock < 0) - return sock; + return libbpf_err(-EINVAL); xdp_id.ifindex = ifindex; xdp_id.flags = flags; - ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id); + ret = libbpf_netlink_send_recv(&req.nh, __dump_link_nlmsg, + get_xdp_info, &xdp_id); if (!ret) { size_t sz = min(info_size, sizeof(xdp_id.info)); @@ -317,8 +345,7 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, memset((void *) info + sz, 0, info_size - sz); } - close(sock); - return ret; + return libbpf_err(ret); } static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) @@ -346,27 +373,413 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) if (!ret) *prog_id = get_xdp_id(&info, flags); - return ret; + return libbpf_err(ret); +} + +typedef int (*qdisc_config_t)(struct nlmsghdr *nh, struct tcmsg *t, + size_t maxsz); + +static int clsact_config(struct nlmsghdr *nh, struct tcmsg *t, size_t maxsz) +{ + t->tcm_parent = TC_H_CLSACT; + t->tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0); + + return nlattr_add(nh, maxsz, TCA_KIND, "clsact", sizeof("clsact")); +} + +static int attach_point_to_config(struct bpf_tc_hook *hook, + qdisc_config_t *config) +{ + switch (OPTS_GET(hook, attach_point, 0)) { + case BPF_TC_INGRESS: + case BPF_TC_EGRESS: + case BPF_TC_INGRESS | BPF_TC_EGRESS: + if (OPTS_GET(hook, parent, 0)) + return -EINVAL; + *config = &clsact_config; + return 0; + case BPF_TC_CUSTOM: + return -EOPNOTSUPP; + default: + return -EINVAL; + } } -int libbpf_nl_get_link(int sock, unsigned int nl_pid, - libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) +static int tc_get_tcm_parent(enum bpf_tc_attach_point attach_point, + __u32 *parent) { + switch (attach_point) { + case BPF_TC_INGRESS: + case BPF_TC_EGRESS: + if (*parent) + return -EINVAL; + *parent = TC_H_MAKE(TC_H_CLSACT, + attach_point == BPF_TC_INGRESS ? + TC_H_MIN_INGRESS : TC_H_MIN_EGRESS); + break; + case BPF_TC_CUSTOM: + if (!*parent) + return -EINVAL; + break; + default: + return -EINVAL; + } + return 0; +} + +static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags) +{ + qdisc_config_t config; + int ret; struct { - struct nlmsghdr nlh; - struct ifinfomsg ifm; - } req = { - .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), - .nlh.nlmsg_type = RTM_GETLINK, - .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, - .ifm.ifi_family = AF_PACKET, - }; - int seq = time(NULL); + struct nlmsghdr nh; + struct tcmsg tc; + char buf[256]; + } req; - req.nlh.nlmsg_seq = seq; - if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0) + ret = attach_point_to_config(hook, &config); + if (ret < 0) + return ret; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags; + req.nh.nlmsg_type = cmd; + req.tc.tcm_family = AF_UNSPEC; + req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0); + + ret = config(&req.nh, &req.tc, sizeof(req)); + if (ret < 0) + return ret; + + return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL); +} + +static int tc_qdisc_create_excl(struct bpf_tc_hook *hook) +{ + return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_EXCL); +} + +static int tc_qdisc_delete(struct bpf_tc_hook *hook) +{ + return tc_qdisc_modify(hook, RTM_DELQDISC, 0); +} + +int bpf_tc_hook_create(struct bpf_tc_hook *hook) +{ + int ret; + + if (!hook || !OPTS_VALID(hook, bpf_tc_hook) || + OPTS_GET(hook, ifindex, 0) <= 0) + return libbpf_err(-EINVAL); + + ret = tc_qdisc_create_excl(hook); + return libbpf_err(ret); +} + +static int __bpf_tc_detach(const struct bpf_tc_hook *hook, + const struct bpf_tc_opts *opts, + const bool flush); + +int bpf_tc_hook_destroy(struct bpf_tc_hook *hook) +{ + if (!hook || !OPTS_VALID(hook, bpf_tc_hook) || + OPTS_GET(hook, ifindex, 0) <= 0) + return libbpf_err(-EINVAL); + + switch (OPTS_GET(hook, attach_point, 0)) { + case BPF_TC_INGRESS: + case BPF_TC_EGRESS: + return libbpf_err(__bpf_tc_detach(hook, NULL, true)); + case BPF_TC_INGRESS | BPF_TC_EGRESS: + return libbpf_err(tc_qdisc_delete(hook)); + case BPF_TC_CUSTOM: + return libbpf_err(-EOPNOTSUPP); + default: + return libbpf_err(-EINVAL); + } +} + +struct bpf_cb_ctx { + struct bpf_tc_opts *opts; + bool processed; +}; + +static int __get_tc_info(void *cookie, struct tcmsg *tc, struct nlattr **tb, + bool unicast) +{ + struct nlattr *tbb[TCA_BPF_MAX + 1]; + struct bpf_cb_ctx *info = cookie; + + if (!info || !info->opts) + return -EINVAL; + if (unicast && info->processed) + return -EINVAL; + if (!tb[TCA_OPTIONS]) + return NL_CONT; + + libbpf_nla_parse_nested(tbb, TCA_BPF_MAX, tb[TCA_OPTIONS], NULL); + if (!tbb[TCA_BPF_ID]) + return -EINVAL; + + OPTS_SET(info->opts, prog_id, libbpf_nla_getattr_u32(tbb[TCA_BPF_ID])); + OPTS_SET(info->opts, handle, tc->tcm_handle); + OPTS_SET(info->opts, priority, TC_H_MAJ(tc->tcm_info) >> 16); + + info->processed = true; + return unicast ? NL_NEXT : NL_DONE; +} + +static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn, + void *cookie) +{ + struct tcmsg *tc = NLMSG_DATA(nh); + struct nlattr *tb[TCA_MAX + 1]; + + libbpf_nla_parse(tb, TCA_MAX, + (struct nlattr *)((char *)tc + NLMSG_ALIGN(sizeof(*tc))), + NLMSG_PAYLOAD(nh, sizeof(*tc)), NULL); + if (!tb[TCA_KIND]) + return NL_CONT; + return __get_tc_info(cookie, tc, tb, nh->nlmsg_flags & NLM_F_ECHO); +} + +static int tc_add_fd_and_name(struct nlmsghdr *nh, size_t maxsz, int fd) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + char name[256]; + int len, ret; + + ret = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (ret < 0) + return ret; + + ret = nlattr_add(nh, maxsz, TCA_BPF_FD, &fd, sizeof(fd)); + if (ret < 0) + return ret; + len = snprintf(name, sizeof(name), "%s:[%u]", info.name, info.id); + if (len < 0) return -errno; + if (len >= sizeof(name)) + return -ENAMETOOLONG; + return nlattr_add(nh, maxsz, TCA_BPF_NAME, name, len + 1); +} + +int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts) +{ + __u32 protocol, bpf_flags, handle, priority, parent, prog_id, flags; + int ret, ifindex, attach_point, prog_fd; + struct bpf_cb_ctx info = {}; + struct nlattr *nla; + struct { + struct nlmsghdr nh; + struct tcmsg tc; + char buf[256]; + } req; + + if (!hook || !opts || + !OPTS_VALID(hook, bpf_tc_hook) || + !OPTS_VALID(opts, bpf_tc_opts)) + return libbpf_err(-EINVAL); + + ifindex = OPTS_GET(hook, ifindex, 0); + parent = OPTS_GET(hook, parent, 0); + attach_point = OPTS_GET(hook, attach_point, 0); + + handle = OPTS_GET(opts, handle, 0); + priority = OPTS_GET(opts, priority, 0); + prog_fd = OPTS_GET(opts, prog_fd, 0); + prog_id = OPTS_GET(opts, prog_id, 0); + flags = OPTS_GET(opts, flags, 0); + + if (ifindex <= 0 || !prog_fd || prog_id) + return libbpf_err(-EINVAL); + if (priority > UINT16_MAX) + return libbpf_err(-EINVAL); + if (flags & ~BPF_TC_F_REPLACE) + return libbpf_err(-EINVAL); + + flags = (flags & BPF_TC_F_REPLACE) ? NLM_F_REPLACE : NLM_F_EXCL; + protocol = ETH_P_ALL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | + NLM_F_ECHO | flags; + req.nh.nlmsg_type = RTM_NEWTFILTER; + req.tc.tcm_family = AF_UNSPEC; + req.tc.tcm_ifindex = ifindex; + req.tc.tcm_handle = handle; + req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol)); + + ret = tc_get_tcm_parent(attach_point, &parent); + if (ret < 0) + return libbpf_err(ret); + req.tc.tcm_parent = parent; - return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg, - dump_link_nlmsg, cookie); + ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf")); + if (ret < 0) + return libbpf_err(ret); + nla = nlattr_begin_nested(&req.nh, sizeof(req), TCA_OPTIONS); + if (!nla) + return libbpf_err(-EMSGSIZE); + ret = tc_add_fd_and_name(&req.nh, sizeof(req), prog_fd); + if (ret < 0) + return libbpf_err(ret); + bpf_flags = TCA_BPF_FLAG_ACT_DIRECT; + ret = nlattr_add(&req.nh, sizeof(req), TCA_BPF_FLAGS, &bpf_flags, + sizeof(bpf_flags)); + if (ret < 0) + return libbpf_err(ret); + nlattr_end_nested(&req.nh, nla); + + info.opts = opts; + + ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info); + if (ret < 0) + return libbpf_err(ret); + if (!info.processed) + return libbpf_err(-ENOENT); + return ret; +} + +static int __bpf_tc_detach(const struct bpf_tc_hook *hook, + const struct bpf_tc_opts *opts, + const bool flush) +{ + __u32 protocol = 0, handle, priority, parent, prog_id, flags; + int ret, ifindex, attach_point, prog_fd; + struct { + struct nlmsghdr nh; + struct tcmsg tc; + char buf[256]; + } req; + + if (!hook || + !OPTS_VALID(hook, bpf_tc_hook) || + !OPTS_VALID(opts, bpf_tc_opts)) + return -EINVAL; + + ifindex = OPTS_GET(hook, ifindex, 0); + parent = OPTS_GET(hook, parent, 0); + attach_point = OPTS_GET(hook, attach_point, 0); + + handle = OPTS_GET(opts, handle, 0); + priority = OPTS_GET(opts, priority, 0); + prog_fd = OPTS_GET(opts, prog_fd, 0); + prog_id = OPTS_GET(opts, prog_id, 0); + flags = OPTS_GET(opts, flags, 0); + + if (ifindex <= 0 || flags || prog_fd || prog_id) + return -EINVAL; + if (priority > UINT16_MAX) + return -EINVAL; + if (!flush) { + if (!handle || !priority) + return -EINVAL; + protocol = ETH_P_ALL; + } else { + if (handle || priority) + return -EINVAL; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_DELTFILTER; + req.tc.tcm_family = AF_UNSPEC; + req.tc.tcm_ifindex = ifindex; + if (!flush) { + req.tc.tcm_handle = handle; + req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol)); + } + + ret = tc_get_tcm_parent(attach_point, &parent); + if (ret < 0) + return ret; + req.tc.tcm_parent = parent; + + if (!flush) { + ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, + "bpf", sizeof("bpf")); + if (ret < 0) + return ret; + } + + return libbpf_netlink_send_recv(&req.nh, NULL, NULL, NULL); +} + +int bpf_tc_detach(const struct bpf_tc_hook *hook, + const struct bpf_tc_opts *opts) +{ + int ret; + + if (!opts) + return libbpf_err(-EINVAL); + + ret = __bpf_tc_detach(hook, opts, false); + return libbpf_err(ret); +} + +int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts) +{ + __u32 protocol, handle, priority, parent, prog_id, flags; + int ret, ifindex, attach_point, prog_fd; + struct bpf_cb_ctx info = {}; + struct { + struct nlmsghdr nh; + struct tcmsg tc; + char buf[256]; + } req; + + if (!hook || !opts || + !OPTS_VALID(hook, bpf_tc_hook) || + !OPTS_VALID(opts, bpf_tc_opts)) + return libbpf_err(-EINVAL); + + ifindex = OPTS_GET(hook, ifindex, 0); + parent = OPTS_GET(hook, parent, 0); + attach_point = OPTS_GET(hook, attach_point, 0); + + handle = OPTS_GET(opts, handle, 0); + priority = OPTS_GET(opts, priority, 0); + prog_fd = OPTS_GET(opts, prog_fd, 0); + prog_id = OPTS_GET(opts, prog_id, 0); + flags = OPTS_GET(opts, flags, 0); + + if (ifindex <= 0 || flags || prog_fd || prog_id || + !handle || !priority) + return libbpf_err(-EINVAL); + if (priority > UINT16_MAX) + return libbpf_err(-EINVAL); + + protocol = ETH_P_ALL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_type = RTM_GETTFILTER; + req.tc.tcm_family = AF_UNSPEC; + req.tc.tcm_ifindex = ifindex; + req.tc.tcm_handle = handle; + req.tc.tcm_info = TC_H_MAKE(priority << 16, htons(protocol)); + + ret = tc_get_tcm_parent(attach_point, &parent); + if (ret < 0) + return libbpf_err(ret); + req.tc.tcm_parent = parent; + + ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf")); + if (ret < 0) + return libbpf_err(ret); + + info.opts = opts; + + ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info); + if (ret < 0) + return libbpf_err(ret); + if (!info.processed) + return libbpf_err(-ENOENT); + return ret; } diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h index 6cc3ac91690f..3c780ab6d022 100644 --- a/tools/lib/bpf/nlattr.h +++ b/tools/lib/bpf/nlattr.h @@ -10,7 +10,10 @@ #define __LIBBPF_NLATTR_H #include <stdint.h> +#include <string.h> +#include <errno.h> #include <linux/netlink.h> + /* avoid multiple definition of netlink features */ #define __LINUX_NETLINK_H @@ -103,4 +106,49 @@ int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype, int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh); +static inline struct nlattr *nla_data(struct nlattr *nla) +{ + return (struct nlattr *)((char *)nla + NLA_HDRLEN); +} + +static inline struct nlattr *nh_tail(struct nlmsghdr *nh) +{ + return (struct nlattr *)((char *)nh + NLMSG_ALIGN(nh->nlmsg_len)); +} + +static inline int nlattr_add(struct nlmsghdr *nh, size_t maxsz, int type, + const void *data, int len) +{ + struct nlattr *nla; + + if (NLMSG_ALIGN(nh->nlmsg_len) + NLA_ALIGN(NLA_HDRLEN + len) > maxsz) + return -EMSGSIZE; + if (!!data != !!len) + return -EINVAL; + + nla = nh_tail(nh); + nla->nla_type = type; + nla->nla_len = NLA_HDRLEN + len; + if (data) + memcpy(nla_data(nla), data, len); + nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + NLA_ALIGN(nla->nla_len); + return 0; +} + +static inline struct nlattr *nlattr_begin_nested(struct nlmsghdr *nh, + size_t maxsz, int type) +{ + struct nlattr *tail; + + tail = nh_tail(nh); + if (nlattr_add(nh, maxsz, type | NLA_F_NESTED, NULL, 0)) + return NULL; + return tail; +} + +static inline void nlattr_end_nested(struct nlmsghdr *nh, struct nlattr *tail) +{ + tail->nla_len = (char *)nh_tail(nh) - (char *)tail; +} + #endif /* __LIBBPF_NLATTR_H */ diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 1d80ad4e0de8..8bc117bcc7bc 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -69,23 +69,23 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, err = -errno; pr_warn("ringbuf: failed to get map info for fd=%d: %d\n", map_fd, err); - return err; + return libbpf_err(err); } if (info.type != BPF_MAP_TYPE_RINGBUF) { pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n", map_fd); - return -EINVAL; + return libbpf_err(-EINVAL); } tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings)); if (!tmp) - return -ENOMEM; + return libbpf_err(-ENOMEM); rb->rings = tmp; tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events)); if (!tmp) - return -ENOMEM; + return libbpf_err(-ENOMEM); rb->events = tmp; r = &rb->rings[rb->ring_cnt]; @@ -103,7 +103,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, err = -errno; pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", map_fd, err); - return err; + return libbpf_err(err); } r->consumer_pos = tmp; @@ -118,7 +118,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, ringbuf_unmap_ring(rb, r); pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n", map_fd, err); - return err; + return libbpf_err(err); } r->producer_pos = tmp; r->data = tmp + rb->page_size; @@ -133,7 +133,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, ringbuf_unmap_ring(rb, r); pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err); - return err; + return libbpf_err(err); } rb->ring_cnt++; @@ -165,11 +165,11 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, int err; if (!OPTS_VALID(opts, ring_buffer_opts)) - return NULL; + return errno = EINVAL, NULL; rb = calloc(1, sizeof(*rb)); if (!rb) - return NULL; + return errno = ENOMEM, NULL; rb->page_size = getpagesize(); @@ -188,7 +188,7 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, err_out: ring_buffer__free(rb); - return NULL; + return errno = -err, NULL; } static inline int roundup_len(__u32 len) @@ -260,7 +260,7 @@ int ring_buffer__consume(struct ring_buffer *rb) err = ringbuf_process_ring(ring); if (err < 0) - return err; + return libbpf_err(err); res += err; } if (res > INT_MAX) @@ -279,7 +279,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms); if (cnt < 0) - return -errno; + return libbpf_err(-errno); for (i = 0; i < cnt; i++) { __u32 ring_id = rb->events[i].data.fd; @@ -287,7 +287,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) err = ringbuf_process_ring(ring); if (err < 0) - return err; + return libbpf_err(err); res += err; } if (res > INT_MAX) diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h new file mode 100644 index 000000000000..b22b50c1b173 --- /dev/null +++ b/tools/lib/bpf/skel_internal.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2021 Facebook */ +#ifndef __SKEL_INTERNAL_H +#define __SKEL_INTERNAL_H + +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/mman.h> + +/* This file is a base header for auto-generated *.lskel.h files. + * Its contents will change and may become part of auto-generation in the future. + * + * The layout of bpf_[map|prog]_desc and bpf_loader_ctx is feature dependent + * and will change from one version of libbpf to another and features + * requested during loader program generation. + */ +struct bpf_map_desc { + union { + /* input for the loader prog */ + struct { + __aligned_u64 initial_value; + __u32 max_entries; + }; + /* output of the loader prog */ + struct { + int map_fd; + }; + }; +}; +struct bpf_prog_desc { + int prog_fd; +}; + +struct bpf_loader_ctx { + size_t sz; + __u32 log_level; + __u32 log_size; + __u64 log_buf; +}; + +struct bpf_load_and_run_opts { + struct bpf_loader_ctx *ctx; + const void *data; + const void *insns; + __u32 data_sz; + __u32 insns_sz; + const char *errstr; +}; + +static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, + unsigned int size) +{ + return syscall(__NR_bpf, cmd, attr, size); +} + +static inline int skel_closenz(int fd) +{ + if (fd > 0) + return close(fd); + return -EINVAL; +} + +static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) +{ + int map_fd = -1, prog_fd = -1, key = 0, err; + union bpf_attr attr; + + map_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, + opts->data_sz, 1, 0); + if (map_fd < 0) { + opts->errstr = "failed to create loader map"; + err = -errno; + goto out; + } + + err = bpf_map_update_elem(map_fd, &key, opts->data, 0); + if (err < 0) { + opts->errstr = "failed to update loader map"; + err = -errno; + goto out; + } + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SYSCALL; + attr.insns = (long) opts->insns; + attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn); + attr.license = (long) "Dual BSD/GPL"; + memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog")); + attr.fd_array = (long) &map_fd; + attr.log_level = opts->ctx->log_level; + attr.log_size = opts->ctx->log_size; + attr.log_buf = opts->ctx->log_buf; + attr.prog_flags = BPF_F_SLEEPABLE; + prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + if (prog_fd < 0) { + opts->errstr = "failed to load loader prog"; + err = -errno; + goto out; + } + + memset(&attr, 0, sizeof(attr)); + attr.test.prog_fd = prog_fd; + attr.test.ctx_in = (long) opts->ctx; + attr.test.ctx_size_in = opts->ctx->sz; + err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr)); + if (err < 0 || (int)attr.test.retval < 0) { + opts->errstr = "failed to execute loader prog"; + if (err < 0) + err = -errno; + else + err = (int)attr.test.retval; + goto out; + } + err = 0; +out: + if (map_fd >= 0) + close(map_fd); + if (prog_fd >= 0) + close(prog_fd); + return err; +} + +#endif diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 6061431ee04c..e9b619aa0cdf 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -1094,7 +1094,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, goto out_put_ctx; } if (xsk->fd == umem->fd) - umem->rx_ring_setup_done = true; + umem->tx_ring_setup_done = true; } err = xsk_get_mmap_offsets(xsk->fd, &off); diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index cedf3ede7545..523aa4157f80 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -19,6 +19,7 @@ #include <objtool/elf.h> #include <objtool/arch.h> #include <objtool/warn.h> +#include <objtool/endianness.h> #include <arch/elf.h> static int is_x86_64(const struct elf *elf) @@ -725,7 +726,7 @@ static int elf_add_alternative(struct elf *elf, return -1; } - alt->cpuid = cpuid; + alt->cpuid = bswap_if_needed(cpuid); alt->instrlen = orig_len; alt->replacementlen = repl_len; @@ -746,6 +747,10 @@ int arch_rewrite_retpolines(struct objtool_file *file) list_for_each_entry(insn, &file->retpoline_call_list, call_node) { + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC) + continue; + if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk")) continue; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index d08f5f3670f8..41bca1d13d8e 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -717,7 +717,7 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str) struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) { - struct section *symtab; + struct section *symtab, *symtab_shndx; struct symbol *sym; Elf_Data *data; Elf_Scn *s; @@ -762,12 +762,36 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) data->d_buf = &sym->sym; data->d_size = sizeof(sym->sym); data->d_align = 1; + data->d_type = ELF_T_SYM; sym->idx = symtab->len / sizeof(sym->sym); symtab->len += data->d_size; symtab->changed = true; + symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); + if (symtab_shndx) { + s = elf_getscn(elf->elf, symtab_shndx->idx); + if (!s) { + WARN_ELF("elf_getscn"); + return NULL; + } + + data = elf_newdata(s); + if (!data) { + WARN_ELF("elf_newdata"); + return NULL; + } + + data->d_buf = &sym->sym.st_size; /* conveniently 0 */ + data->d_size = sizeof(Elf32_Word); + data->d_align = 4; + data->d_type = ELF_T_WORD; + + symtab_shndx->len += 4; + symtab_shndx->changed = true; + } + sym->sec = find_section_by_index(elf, 0); elf_add_symbol(elf, sym); diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 1dcec73c910c..bcf3eca5afbe 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -108,9 +108,9 @@ displayed as follows: perf script --itrace=ibxwpe -F+flags -The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, -system, asynchronous, interrupt, transaction abort, trace begin, trace end, and -in transaction, respectively. +The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional, +system, asynchronous, interrupt, transaction abort, trace begin, trace end, +in transaction, VM-entry, and VM-exit respectively. perf script also supports higher level ways to dump instruction traces: diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 5b8b61075039..48a5f5b26dd4 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -183,14 +183,15 @@ OPTIONS At this point usage is displayed, and perf-script exits. The flags field is synthesized and may have a value when Instruction - Trace decoding. The flags are "bcrosyiABEx" which stand for branch, + Trace decoding. The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional, system, asynchronous, interrupt, - transaction abort, trace begin, trace end, and in transaction, + transaction abort, trace begin, trace end, in transaction, VM-Entry, and VM-Exit respectively. Known combinations of flags are printed more nicely e.g. "call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b", "int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs", "async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB", - "tr end" for "bE". However the "x" flag will be display separately in those + "tr end" for "bE", "vmentry" for "bcg", "vmexit" for "bch". + However the "x" flag will be displayed separately in those cases e.g. "jcc (x)" for a condition branch within a transaction. The callindent field is synthesized and may have a value when diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 0d6619064a83..73df23dd664c 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -90,7 +90,6 @@ endif ifeq ($(ARCH),mips) NO_PERF_REGS := 0 CFLAGS += -I$(OUTPUT)arch/mips/include/generated - CFLAGS += -I../../arch/mips/include/uapi -I../../arch/mips/include/generated/uapi LIBUNWIND_LIBS = -lunwind -lunwind-mips endif @@ -540,6 +539,7 @@ ifndef NO_LIBELF ifdef LIBBPF_DYNAMIC ifeq ($(feature-libbpf), 1) EXTLIBS += -lbpf + $(call detected,CONFIG_LIBBPF_DYNAMIC) else dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c index 2303256b7d05..73d18e0ed6f6 100644 --- a/tools/perf/arch/arm64/util/kvm-stat.c +++ b/tools/perf/arch/arm64/util/kvm-stat.c @@ -71,7 +71,7 @@ struct kvm_reg_events_ops kvm_reg_events_ops[] = { .name = "vmexit", .ops = &exit_events, }, - { NULL }, + { NULL, NULL }, }; const char * const kvm_skip_events[] = { diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 91649690b52f..9cd1c34f31b5 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -356,3 +356,8 @@ 439 n64 faccessat2 sys_faccessat2 440 n64 process_madvise sys_process_madvise 441 n64 epoll_pwait2 sys_epoll_pwait2 +442 n64 mount_setattr sys_mount_setattr +# 443 reserved for quotactl_path +444 n64 landlock_create_ruleset sys_landlock_create_ruleset +445 n64 landlock_add_rule sys_landlock_add_rule +446 n64 landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 0b2480cf3e47..8f052ff4058c 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -522,3 +522,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr +# 443 reserved for quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 3abef2144dac..0690263df1dd 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -445,3 +445,7 @@ 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr sys_mount_setattr +# 443 reserved for quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 7bf01cbe582f..ce18119ea0d0 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,6 +364,10 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr +# 443 reserved for quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 87f5b1a4a7fa..833405c27dae 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -80,6 +80,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits) if (!perf_header__has_feat(&session->header, HEADER_BUILD_ID)) with_hits = true; + if (zstd_init(&(session->zstd_data), 0) < 0) + pr_warning("Decompression initialization failed. Reported data may be incomplete.\n"); + /* * in pipe-mode, the only way to get the buildids is to parse * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3337b5f93336..84803abeb942 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2714,6 +2714,12 @@ int cmd_record(int argc, const char **argv) rec->no_buildid = true; } + if (rec->opts.record_cgroup && !perf_can_record_cgroup()) { + pr_err("Kernel has no cgroup sampling support.\n"); + err = -EINVAL; + goto out_opts; + } + if (rec->opts.kcore) rec->data.is_dir = true; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5a830ae09418..f9f74a514315 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -572,7 +572,8 @@ static int enable_counters(void) * - we have initial delay configured */ if (!target__none(&target) || stat_config.initial_delay) { - evlist__enable(evsel_list); + if (!all_counters_use_bpf) + evlist__enable(evsel_list); if (stat_config.initial_delay > 0) pr_info(EVLIST_ENABLED_MSG); } @@ -581,13 +582,19 @@ static int enable_counters(void) static void disable_counters(void) { + struct evsel *counter; + /* * If we don't have tracee (attaching to task or cpu), counters may * still be running. To get accurate group ratios, we must stop groups * from counting before reading their constituent counters. */ - if (!target__none(&target)) - evlist__disable(evsel_list); + if (!target__none(&target)) { + evlist__for_each_entry(evsel_list, counter) + bpf_counter__disable(counter); + if (!all_counters_use_bpf) + evlist__disable(evsel_list); + } } static volatile int workload_exec_errno; diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index dd8ff287e930..c783558332b8 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -39,6 +39,7 @@ arch/x86/lib/x86-opcode-map.txt arch/x86/tools/gen-insn-attr-x86.awk arch/arm/include/uapi/asm/perf_regs.h arch/arm64/include/uapi/asm/perf_regs.h +arch/mips/include/uapi/asm/perf_regs.h arch/powerpc/include/uapi/asm/perf_regs.h arch/s390/include/uapi/asm/perf_regs.h arch/x86/include/uapi/asm/perf_regs.h diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 20cb91ef06ff..2f6b67189b42 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -443,6 +443,8 @@ int main(int argc, const char **argv) const char *cmd; char sbuf[STRERR_BUFSIZE]; + perf_debug_setup(); + /* libsubcmd init */ exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT); pager_init(PERF_PAGER_ENVIRONMENT); @@ -531,8 +533,6 @@ int main(int argc, const char **argv) */ pthread__block_sigwinch(); - perf_debug_setup(); - while (1) { static int done_help; diff --git a/tools/perf/pmu-events/arch/powerpc/power10/cache.json b/tools/perf/pmu-events/arch/powerpc/power10/cache.json index 616f29098c71..605be14f441c 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/cache.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/cache.json @@ -1,46 +1,56 @@ [ { - "EventCode": "1003C", + "EventCode": "0x1003C", "EventName": "PM_EXEC_STALL_DMISS_L2L3", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3." }, { - "EventCode": "34056", + "EventCode": "0x1E054", + "EventName": "PM_EXEC_STALL_DMISS_L21_L31", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip." + }, + { + "EventCode": "0x34054", + "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict." + }, + { + "EventCode": "0x34056", "EventName": "PM_EXEC_STALL_LOAD_FINISH", - "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ." + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction." }, { - "EventCode": "3006C", + "EventCode": "0x3006C", "EventName": "PM_RUN_CYC_SMT2_MODE", "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode." }, { - "EventCode": "300F4", + "EventCode": "0x300F4", "EventName": "PM_RUN_INST_CMPL_CONC", "BriefDescription": "PowerPC instructions completed by this thread when all threads in the core had the run-latch set." }, { - "EventCode": "4C016", + "EventCode": "0x4C016", "EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict." }, { - "EventCode": "4D014", + "EventCode": "0x4D014", "EventName": "PM_EXEC_STALL_LOAD", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit." }, { - "EventCode": "4D016", + "EventCode": "0x4D016", "EventName": "PM_EXEC_STALL_PTESYNC", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit." }, { - "EventCode": "401EA", + "EventCode": "0x401EA", "EventName": "PM_THRESH_EXC_128", "BriefDescription": "Threshold counter exceeded a value of 128." }, { - "EventCode": "400F6", + "EventCode": "0x400F6", "EventName": "PM_BR_MPRED_CMPL", "BriefDescription": "A mispredicted branch completed. Includes direction and target." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json index 703cd431ae5b..54acb55e2c8c 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json @@ -1,6 +1,6 @@ [ { - "EventCode": "4016E", + "EventCode": "0x4016E", "EventName": "PM_THRESH_NOT_MET", "BriefDescription": "Threshold counter did not meet threshold." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json index eac8609dcc90..558f9530f54e 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json @@ -1,216 +1,246 @@ [ { - "EventCode": "10004", + "EventCode": "0x10004", "EventName": "PM_EXEC_STALL_TRANSLATION", "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve." }, { - "EventCode": "10010", + "EventCode": "0x10006", + "EventName": "PM_DISP_STALL_HELD_OTHER_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason." + }, + { + "EventCode": "0x10010", "EventName": "PM_PMC4_OVERFLOW", "BriefDescription": "The event selected for PMC4 caused the event counter to overflow." }, { - "EventCode": "10020", + "EventCode": "0x10020", "EventName": "PM_PMC4_REWIND", "BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged." }, { - "EventCode": "10038", + "EventCode": "0x10038", "EventName": "PM_DISP_STALL_TRANSLATION", "BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss." }, { - "EventCode": "1003A", + "EventCode": "0x1003A", "EventName": "PM_DISP_STALL_BR_MPRED_IC_L2", "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict." }, { - "EventCode": "1E050", + "EventCode": "0x1D05E", + "EventName": "PM_DISP_STALL_HELD_HALT_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management." + }, + { + "EventCode": "0x1E050", "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC", "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR." }, { - "EventCode": "1F054", + "EventCode": "0x1F054", "EventName": "PM_DTLB_HIT", "BriefDescription": "The PTE required by the instruction was resident in the TLB (data TLB access). When MMCR1[16]=0 this event counts only demand hits. When MMCR1[16]=1 this event includes demand and prefetch. Applies to both HPT and RPT." }, { - "EventCode": "101E8", + "EventCode": "0x10064", + "EventName": "PM_DISP_STALL_IC_L2", + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2." + }, + { + "EventCode": "0x101E8", "EventName": "PM_THRESH_EXC_256", "BriefDescription": "Threshold counter exceeded a count of 256." }, { - "EventCode": "101EC", + "EventCode": "0x101EC", "EventName": "PM_THRESH_MET", "BriefDescription": "Threshold exceeded." }, { - "EventCode": "100F2", + "EventCode": "0x100F2", "EventName": "PM_1PLUS_PPC_CMPL", "BriefDescription": "Cycles in which at least one instruction is completed by this thread." }, { - "EventCode": "100F6", + "EventCode": "0x100F6", "EventName": "PM_IERAT_MISS", "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event." }, { - "EventCode": "100F8", + "EventCode": "0x100F8", "EventName": "PM_DISP_STALL_CYC", "BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)." }, { - "EventCode": "20114", + "EventCode": "0x20006", + "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue." + }, + { + "EventCode": "0x20114", "EventName": "PM_MRK_L2_RC_DISP", "BriefDescription": "Marked instruction RC dispatched in L2." }, { - "EventCode": "2C010", + "EventCode": "0x2C010", "EventName": "PM_EXEC_STALL_LSU", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions." }, { - "EventCode": "2C016", + "EventCode": "0x2C016", "EventName": "PM_DISP_STALL_IERAT_ONLY_MISS", "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss." }, { - "EventCode": "2C01E", + "EventCode": "0x2C01E", "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3", "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict." }, { - "EventCode": "2D01A", + "EventCode": "0x2D01A", "EventName": "PM_DISP_STALL_IC_MISS", "BriefDescription": "Cycles when dispatch was stalled for this thread due to an Icache Miss." }, { - "EventCode": "2D01C", - "EventName": "PM_CMPL_STALL_STCX", - "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing." - }, - { - "EventCode": "2E018", + "EventCode": "0x2E018", "EventName": "PM_DISP_STALL_FETCH", "BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held." }, { - "EventCode": "2E01A", + "EventCode": "0x2E01A", "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC", "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the XVFC mapper/SRB was full." }, { - "EventCode": "2C142", + "EventCode": "0x2C142", "EventName": "PM_MRK_XFER_FROM_SRC_PMC2", "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "24050", + "EventCode": "0x24050", "EventName": "PM_IOPS_DISP", "BriefDescription": "Internal Operations dispatched. PM_IOPS_DISP / PM_INST_DISP will show the average number of internal operations per PowerPC instruction." }, { - "EventCode": "2405E", + "EventCode": "0x2405E", "EventName": "PM_ISSUE_CANCEL", "BriefDescription": "An instruction issued and the issue was later cancelled. Only one cancel per PowerPC instruction." }, { - "EventCode": "200FA", + "EventCode": "0x200FA", "EventName": "PM_BR_TAKEN_CMPL", "BriefDescription": "Branch Taken instruction completed." }, { - "EventCode": "30012", + "EventCode": "0x30004", + "EventName": "PM_DISP_STALL_FLUSH", + "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC." + }, + { + "EventCode": "0x3000A", + "EventName": "PM_DISP_STALL_ITLB_MISS", + "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss." + }, + { + "EventCode": "0x30012", "EventName": "PM_FLUSH_COMPLETION", "BriefDescription": "The instruction that was next to complete (oldest in the pipeline) did not complete because it suffered a flush." }, { - "EventCode": "30014", + "EventCode": "0x30014", "EventName": "PM_EXEC_STALL_STORE", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit." }, { - "EventCode": "30018", + "EventCode": "0x30018", "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC", "BriefDescription": "Cycles in which the NTC instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together." }, { - "EventCode": "30026", + "EventCode": "0x30026", "EventName": "PM_EXEC_STALL_STORE_MISS", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1." }, { - "EventCode": "3012A", + "EventCode": "0x3012A", "EventName": "PM_MRK_L2_RC_DONE", "BriefDescription": "L2 RC machine completed the transaction for the marked instruction." }, { - "EventCode": "3F046", + "EventCode": "0x3F046", "EventName": "PM_ITLB_HIT_1G", "BriefDescription": "Instruction TLB hit (IERAT reload) page size 1G, which implies Radix Page Table translation is in use. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches." }, { - "EventCode": "34058", + "EventCode": "0x34058", "EventName": "PM_DISP_STALL_BR_MPRED_ICMISS", "BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss." }, { - "EventCode": "3D05C", + "EventCode": "0x3D05C", "EventName": "PM_DISP_STALL_HELD_RENAME_CYC", "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC." }, { - "EventCode": "3E052", + "EventCode": "0x3E052", "EventName": "PM_DISP_STALL_IC_L3", "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3." }, { - "EventCode": "3E054", + "EventCode": "0x3E054", "EventName": "PM_LD_MISS_L1", "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." }, { - "EventCode": "301EA", + "EventCode": "0x301EA", "EventName": "PM_THRESH_EXC_1024", "BriefDescription": "Threshold counter exceeded a value of 1024." }, { - "EventCode": "300FA", + "EventCode": "0x300FA", "EventName": "PM_INST_FROM_L3MISS", "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss." }, { - "EventCode": "40006", + "EventCode": "0x40006", "EventName": "PM_ISSUE_KILL", "BriefDescription": "Cycles in which an instruction or group of instructions were cancelled after being issued. This event increments once per occurrence, regardless of how many instructions are included in the issue group." }, { - "EventCode": "40116", + "EventCode": "0x40116", "EventName": "PM_MRK_LARX_FIN", "BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock." }, { - "EventCode": "4C010", + "EventCode": "0x4C010", "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS", "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch." }, { - "EventCode": "4D01E", + "EventCode": "0x4D01E", "EventName": "PM_DISP_STALL_BR_MPRED", "BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch." }, { - "EventCode": "4E010", + "EventCode": "0x4E010", "EventName": "PM_DISP_STALL_IC_L3MISS", "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3." }, { - "EventCode": "4E01A", + "EventCode": "0x4E01A", "EventName": "PM_DISP_STALL_HELD_CYC", "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason." }, { - "EventCode": "44056", + "EventCode": "0x4003C", + "EventName": "PM_DISP_STALL_HELD_SYNC_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch." + }, + { + "EventCode": "0x44056", "EventName": "PM_VECTOR_ST_CMPL", "BriefDescription": "Vector store instructions completed." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/locks.json b/tools/perf/pmu-events/arch/powerpc/power10/locks.json index 016d8de0e14a..b5a0d6521963 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/locks.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/locks.json @@ -1,11 +1,11 @@ [ { - "EventCode": "1E058", + "EventCode": "0x1E058", "EventName": "PM_STCX_FAIL_FIN", "BriefDescription": "Conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock." }, { - "EventCode": "4E050", + "EventCode": "0x4E050", "EventName": "PM_STCX_PASS_FIN", "BriefDescription": "Conditional store instruction (STCX) passed. LARX and STCX are instructions used to acquire a lock." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/marked.json b/tools/perf/pmu-events/arch/powerpc/power10/marked.json index 93a5a5910648..58b5dfe3a273 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/marked.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/marked.json @@ -1,146 +1,141 @@ [ { - "EventCode": "1002C", + "EventCode": "0x1002C", "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS", "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request." }, { - "EventCode": "10132", + "EventCode": "0x10132", "EventName": "PM_MRK_INST_ISSUED", "BriefDescription": "Marked instruction issued. Note that stores always get issued twice, the address gets issued to the LSU and the data gets issued to the VSU. Also, issues can sometimes get killed/cancelled and cause multiple sequential issues for the same instruction." }, { - "EventCode": "101E0", + "EventCode": "0x101E0", "EventName": "PM_MRK_INST_DISP", "BriefDescription": "The thread has dispatched a randomly sampled marked instruction." }, { - "EventCode": "101E2", + "EventCode": "0x101E2", "EventName": "PM_MRK_BR_TAKEN_CMPL", "BriefDescription": "Marked Branch Taken instruction completed." }, { - "EventCode": "20112", + "EventCode": "0x20112", "EventName": "PM_MRK_NTF_FIN", "BriefDescription": "The marked instruction became the oldest in the pipeline before it finished. It excludes instructions that finish at dispatch." }, { - "EventCode": "2C01C", + "EventCode": "0x2C01C", "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip." }, { - "EventCode": "20138", + "EventCode": "0x20138", "EventName": "PM_MRK_ST_NEST", "BriefDescription": "A store has been sampled/marked and is at the point of execution where it has completed in the core and can no longer be flushed. At this point the store is sent to the L2." }, { - "EventCode": "2013A", + "EventCode": "0x2013A", "EventName": "PM_MRK_BRU_FIN", "BriefDescription": "Marked Branch instruction finished." }, { - "EventCode": "2C144", + "EventCode": "0x2C144", "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC2", "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[15:27]." }, { - "EventCode": "24156", + "EventCode": "0x24156", "EventName": "PM_MRK_STCX_FIN", "BriefDescription": "Marked conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock." }, { - "EventCode": "24158", + "EventCode": "0x24158", "EventName": "PM_MRK_INST", "BriefDescription": "An instruction was marked. Includes both Random Instruction Sampling (RIS) at decode time and Random Event Sampling (RES) at the time the configured event happens." }, { - "EventCode": "2415C", + "EventCode": "0x2415C", "EventName": "PM_MRK_BR_CMPL", "BriefDescription": "A marked branch completed. All branches are included." }, { - "EventCode": "200FD", + "EventCode": "0x200FD", "EventName": "PM_L1_ICACHE_MISS", "BriefDescription": "Demand iCache Miss." }, { - "EventCode": "30130", + "EventCode": "0x30130", "EventName": "PM_MRK_INST_FIN", "BriefDescription": "marked instruction finished. Excludes instructions that finish at dispatch. Note that stores always finish twice since the address gets issued to the LSU and the data gets issued to the VSU." }, { - "EventCode": "34146", + "EventCode": "0x34146", "EventName": "PM_MRK_LD_CMPL", "BriefDescription": "Marked loads completed." }, { - "EventCode": "3E158", + "EventCode": "0x3E158", "EventName": "PM_MRK_STCX_FAIL", "BriefDescription": "Marked conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock." }, { - "EventCode": "3E15A", + "EventCode": "0x3E15A", "EventName": "PM_MRK_ST_FIN", "BriefDescription": "The marked instruction was a store of any kind." }, { - "EventCode": "30068", + "EventCode": "0x30068", "EventName": "PM_L1_ICACHE_RELOADED_PREF", "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)." }, { - "EventCode": "301E4", + "EventCode": "0x301E4", "EventName": "PM_MRK_BR_MPRED_CMPL", "BriefDescription": "Marked Branch Mispredicted. Includes direction and target." }, { - "EventCode": "300F6", + "EventCode": "0x300F6", "EventName": "PM_LD_DEMAND_MISS_L1", "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish." }, { - "EventCode": "300FE", + "EventCode": "0x300FE", "EventName": "PM_DATA_FROM_L3MISS", "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss." }, { - "EventCode": "40012", + "EventCode": "0x40012", "EventName": "PM_L1_ICACHE_RELOADED_ALL", "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch." }, { - "EventCode": "40134", + "EventCode": "0x40134", "EventName": "PM_MRK_INST_TIMEO", "BriefDescription": "Marked instruction finish timeout (instruction was lost)." }, { - "EventCode": "4003C", - "EventName": "PM_DISP_STALL_HELD_SYNC_CYC", - "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch." - }, - { - "EventCode": "4505A", + "EventCode": "0x4505A", "EventName": "PM_SP_FLOP_CMPL", "BriefDescription": "Single Precision floating point instructions completed." }, { - "EventCode": "4D058", + "EventCode": "0x4D058", "EventName": "PM_VECTOR_FLOP_CMPL", "BriefDescription": "Vector floating point instructions completed." }, { - "EventCode": "4D05A", + "EventCode": "0x4D05A", "EventName": "PM_NON_MATH_FLOP_CMPL", "BriefDescription": "Non Math instructions completed." }, { - "EventCode": "401E0", + "EventCode": "0x401E0", "EventName": "PM_MRK_INST_CMPL", "BriefDescription": "marked instruction completed." }, { - "EventCode": "400FE", + "EventCode": "0x400FE", "EventName": "PM_DATA_FROM_MEMORY", "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/memory.json b/tools/perf/pmu-events/arch/powerpc/power10/memory.json index b01141eeebee..843b51f531e9 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/memory.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/memory.json @@ -1,191 +1,186 @@ [ { - "EventCode": "1000A", + "EventCode": "0x1000A", "EventName": "PM_PMC3_REWIND", "BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged." }, { - "EventCode": "1C040", + "EventCode": "0x1C040", "EventName": "PM_XFER_FROM_SRC_PMC1", "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "1C142", + "EventCode": "0x1C142", "EventName": "PM_MRK_XFER_FROM_SRC_PMC1", "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "1C144", + "EventCode": "0x1C144", "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1", "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]." }, { - "EventCode": "1C056", + "EventCode": "0x1C056", "EventName": "PM_DERAT_MISS_4K", "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "1C058", + "EventCode": "0x1C058", "EventName": "PM_DTLB_MISS_16G", "BriefDescription": "Data TLB reload (after a miss) page size 16G. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "1C05C", + "EventCode": "0x1C05C", "EventName": "PM_DTLB_MISS_2M", "BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "1E056", + "EventCode": "0x1E056", "EventName": "PM_EXEC_STALL_STORE_PIPE", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions." }, { - "EventCode": "1F150", + "EventCode": "0x1F150", "EventName": "PM_MRK_ST_L2_CYC", "BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion." }, { - "EventCode": "10062", + "EventCode": "0x10062", "EventName": "PM_LD_L3MISS_PEND_CYC", "BriefDescription": "Cycles L3 miss was pending for this thread." }, { - "EventCode": "20010", + "EventCode": "0x20010", "EventName": "PM_PMC1_OVERFLOW", "BriefDescription": "The event selected for PMC1 caused the event counter to overflow." }, { - "EventCode": "2001A", + "EventCode": "0x2001A", "EventName": "PM_ITLB_HIT", "BriefDescription": "The PTE required to translate the instruction address was resident in the TLB (instruction TLB access/IERAT reload). Applies to both HPT and RPT. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches." }, { - "EventCode": "2003E", + "EventCode": "0x2003E", "EventName": "PM_PTESYNC_FIN", "BriefDescription": "Ptesync instruction finished in the store unit. Only one ptesync can finish at a time." }, { - "EventCode": "2C040", + "EventCode": "0x2C040", "EventName": "PM_XFER_FROM_SRC_PMC2", "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "2C054", + "EventCode": "0x2C054", "EventName": "PM_DERAT_MISS_64K", "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "2C056", + "EventCode": "0x2C056", "EventName": "PM_DTLB_MISS_4K", "BriefDescription": "Data TLB reload (after a miss) page size 4K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "2D154", + "EventCode": "0x2D154", "EventName": "PM_MRK_DERAT_MISS_64K", "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "200F6", + "EventCode": "0x200F6", "EventName": "PM_DERAT_MISS", "BriefDescription": "DERAT Reloaded to satisfy a DERAT miss. All page sizes are counted by this event. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "3000A", - "EventName": "PM_DISP_STALL_ITLB_MISS", - "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss." - }, - { - "EventCode": "30016", + "EventCode": "0x30016", "EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS", "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve." }, { - "EventCode": "3C040", + "EventCode": "0x3C040", "EventName": "PM_XFER_FROM_SRC_PMC3", "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "3C142", + "EventCode": "0x3C142", "EventName": "PM_MRK_XFER_FROM_SRC_PMC3", "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "3C144", + "EventCode": "0x3C144", "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3", "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]." }, { - "EventCode": "3C054", + "EventCode": "0x3C054", "EventName": "PM_DERAT_MISS_16M", "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "3C056", + "EventCode": "0x3C056", "EventName": "PM_DTLB_MISS_64K", "BriefDescription": "Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "3C058", + "EventCode": "0x3C058", "EventName": "PM_LARX_FIN", "BriefDescription": "Load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock." }, { - "EventCode": "301E2", + "EventCode": "0x301E2", "EventName": "PM_MRK_ST_CMPL", "BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores." }, { - "EventCode": "300FC", + "EventCode": "0x300FC", "EventName": "PM_DTLB_MISS", "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. It includes pages of all sizes for demand and prefetch activity." }, { - "EventCode": "4D02C", + "EventCode": "0x4D02C", "EventName": "PM_PMC1_REWIND", "BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged." }, { - "EventCode": "4003E", + "EventCode": "0x4003E", "EventName": "PM_LD_CMPL", "BriefDescription": "Loads completed." }, { - "EventCode": "4C040", + "EventCode": "0x4C040", "EventName": "PM_XFER_FROM_SRC_PMC4", "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "4C142", + "EventCode": "0x4C142", "EventName": "PM_MRK_XFER_FROM_SRC_PMC4", "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." }, { - "EventCode": "4C144", + "EventCode": "0x4C144", "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4", "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]." }, { - "EventCode": "4C056", + "EventCode": "0x4C056", "EventName": "PM_DTLB_MISS_16M", "BriefDescription": "Data TLB reload (after a miss) page size 16M. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "4C05A", + "EventCode": "0x4C05A", "EventName": "PM_DTLB_MISS_1G", "BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "4C15E", + "EventCode": "0x4C15E", "EventName": "PM_MRK_DTLB_MISS_64K", "BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "4D056", + "EventCode": "0x4D056", "EventName": "PM_NON_FMA_FLOP_CMPL", "BriefDescription": "Non FMA instruction completed." }, { - "EventCode": "40164", + "EventCode": "0x40164", "EventName": "PM_MRK_DERAT_MISS_2M", "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json index a119e56cbf1c..7d0de1a2860b 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/others.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json @@ -1,296 +1,271 @@ [ { - "EventCode": "10016", + "EventCode": "0x10016", "EventName": "PM_VSU0_ISSUE", "BriefDescription": "VSU instructions issued to VSU pipe 0." }, { - "EventCode": "1001C", + "EventCode": "0x1001C", "EventName": "PM_ULTRAVISOR_INST_CMPL", "BriefDescription": "PowerPC instructions that completed while the thread was in ultravisor state." }, { - "EventCode": "100F0", + "EventCode": "0x100F0", "EventName": "PM_CYC", "BriefDescription": "Processor cycles." }, { - "EventCode": "10134", + "EventCode": "0x10134", "EventName": "PM_MRK_ST_DONE_L2", "BriefDescription": "Marked stores completed in L2 (RC machine done)." }, { - "EventCode": "1505E", + "EventCode": "0x1505E", "EventName": "PM_LD_HIT_L1", "BriefDescription": "Loads that finished without experiencing an L1 miss." }, { - "EventCode": "1D05E", - "EventName": "PM_DISP_STALL_HELD_HALT_CYC", - "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management." - }, - { - "EventCode": "1E054", - "EventName": "PM_EXEC_STALL_DMISS_L21_L31", - "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip." - }, - { - "EventCode": "1E05A", - "EventName": "PM_CMPL_STALL_LWSYNC", - "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete." - }, - { - "EventCode": "1F056", + "EventCode": "0x1F056", "EventName": "PM_DISP_SS0_2_INSTR_CYC", "BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions." }, { - "EventCode": "1F15C", + "EventCode": "0x1F15C", "EventName": "PM_MRK_STCX_L2_CYC", "BriefDescription": "Cycles spent in the nest portion of a marked Stcx instruction. It starts counting when the operation starts to drain to the L2 and it stops counting when the instruction retires from the Instruction Completion Table (ICT) in the Instruction Sequencing Unit (ISU)." }, { - "EventCode": "10066", + "EventCode": "0x10066", "EventName": "PM_ADJUNCT_CYC", "BriefDescription": "Cycles in which the thread is in Adjunct state. MSR[S HV PR] bits = 011." }, { - "EventCode": "101E4", + "EventCode": "0x101E4", "EventName": "PM_MRK_L1_ICACHE_MISS", "BriefDescription": "Marked Instruction suffered an icache Miss." }, { - "EventCode": "101EA", + "EventCode": "0x101EA", "EventName": "PM_MRK_L1_RELOAD_VALID", "BriefDescription": "Marked demand reload." }, { - "EventCode": "100F4", + "EventCode": "0x100F4", "EventName": "PM_FLOP_CMPL", "BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops." }, { - "EventCode": "100FA", + "EventCode": "0x100FA", "EventName": "PM_RUN_LATCH_ANY_THREAD_CYC", "BriefDescription": "Cycles when at least one thread has the run latch set." }, { - "EventCode": "100FC", + "EventCode": "0x100FC", "EventName": "PM_LD_REF_L1", "BriefDescription": "All L1 D cache load references counted at finish, gated by reject. In P9 and earlier this event counted only cacheable loads but in P10 both cacheable and non-cacheable loads are included." }, { - "EventCode": "20006", - "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC", - "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue." - }, - { - "EventCode": "2000C", + "EventCode": "0x2000C", "EventName": "PM_RUN_LATCH_ALL_THREADS_CYC", "BriefDescription": "Cycles when the run latch is set for all threads." }, { - "EventCode": "2E010", + "EventCode": "0x2E010", "EventName": "PM_ADJUNCT_INST_CMPL", "BriefDescription": "PowerPC instructions that completed while the thread is in Adjunct state." }, { - "EventCode": "2E014", + "EventCode": "0x2E014", "EventName": "PM_STCX_FIN", "BriefDescription": "Conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock." }, { - "EventCode": "20130", + "EventCode": "0x20130", "EventName": "PM_MRK_INST_DECODED", "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only." }, { - "EventCode": "20132", + "EventCode": "0x20132", "EventName": "PM_MRK_DFU_ISSUE", "BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time." }, { - "EventCode": "20134", + "EventCode": "0x20134", "EventName": "PM_MRK_FXU_ISSUE", "BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time." }, { - "EventCode": "2505C", + "EventCode": "0x2505C", "EventName": "PM_VSU_ISSUE", "BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations." }, { - "EventCode": "2F054", + "EventCode": "0x2F054", "EventName": "PM_DISP_SS1_2_INSTR_CYC", "BriefDescription": "Cycles in which Superslice 1 dispatches either 1 or 2 instructions." }, { - "EventCode": "2F056", + "EventCode": "0x2F056", "EventName": "PM_DISP_SS1_4_INSTR_CYC", "BriefDescription": "Cycles in which Superslice 1 dispatches either 3 or 4 instructions." }, { - "EventCode": "2006C", + "EventCode": "0x2006C", "EventName": "PM_RUN_CYC_SMT4_MODE", "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode." }, { - "EventCode": "201E0", + "EventCode": "0x201E0", "EventName": "PM_MRK_DATA_FROM_MEMORY", "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load." }, { - "EventCode": "201E4", + "EventCode": "0x201E4", "EventName": "PM_MRK_DATA_FROM_L3MISS", "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load." }, { - "EventCode": "201E8", + "EventCode": "0x201E8", "EventName": "PM_THRESH_EXC_512", "BriefDescription": "Threshold counter exceeded a value of 512." }, { - "EventCode": "200F2", + "EventCode": "0x200F2", "EventName": "PM_INST_DISP", "BriefDescription": "PowerPC instructions dispatched." }, { - "EventCode": "30132", + "EventCode": "0x30132", "EventName": "PM_MRK_VSU_FIN", "BriefDescription": "VSU marked instructions finished. Excludes simple FX instructions issued to the Store Unit." }, { - "EventCode": "30038", + "EventCode": "0x30038", "EventName": "PM_EXEC_STALL_DMISS_LMEM", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCapp cache, or local OpenCapp memory." }, { - "EventCode": "3F04A", + "EventCode": "0x3F04A", "EventName": "PM_LSU_ST5_FIN", "BriefDescription": "LSU Finished an internal operation in ST2 port." }, { - "EventCode": "34054", - "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT", - "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict." - }, - { - "EventCode": "3405A", + "EventCode": "0x3405A", "EventName": "PM_PRIVILEGED_INST_CMPL", "BriefDescription": "PowerPC Instructions that completed while the thread is in Privileged state." }, { - "EventCode": "3F150", + "EventCode": "0x3F150", "EventName": "PM_MRK_ST_DRAIN_CYC", "BriefDescription": "cycles to drain st from core to L2." }, { - "EventCode": "3F054", + "EventCode": "0x3F054", "EventName": "PM_DISP_SS0_4_INSTR_CYC", "BriefDescription": "Cycles in which Superslice 0 dispatches either 3 or 4 instructions." }, { - "EventCode": "3F056", + "EventCode": "0x3F056", "EventName": "PM_DISP_SS0_8_INSTR_CYC", "BriefDescription": "Cycles in which Superslice 0 dispatches either 5, 6, 7 or 8 instructions." }, { - "EventCode": "30162", + "EventCode": "0x30162", "EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD", "BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill." }, { - "EventCode": "40114", + "EventCode": "0x40114", "EventName": "PM_MRK_START_PROBE_NOP_DISP", "BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0." }, { - "EventCode": "4001C", + "EventCode": "0x4001C", "EventName": "PM_VSU_FIN", "BriefDescription": "VSU instructions finished." }, { - "EventCode": "4C01A", + "EventCode": "0x4C01A", "EventName": "PM_EXEC_STALL_DMISS_OFF_NODE", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip." }, { - "EventCode": "4D012", + "EventCode": "0x4D012", "EventName": "PM_PMC3_SAVED", "BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged." }, { - "EventCode": "4D022", + "EventCode": "0x4D022", "EventName": "PM_HYPERVISOR_INST_CMPL", "BriefDescription": "PowerPC instructions that completed while the thread is in hypervisor state." }, { - "EventCode": "4D026", + "EventCode": "0x4D026", "EventName": "PM_ULTRAVISOR_CYC", "BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110." }, { - "EventCode": "4D028", + "EventCode": "0x4D028", "EventName": "PM_PRIVILEGED_CYC", "BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00." }, { - "EventCode": "40030", + "EventCode": "0x40030", "EventName": "PM_INST_FIN", "BriefDescription": "Instructions finished." }, { - "EventCode": "44146", + "EventCode": "0x44146", "EventName": "PM_MRK_STCX_CORE_CYC", "BriefDescription": "Cycles spent in the core portion of a marked Stcx instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2." }, { - "EventCode": "44054", + "EventCode": "0x44054", "EventName": "PM_VECTOR_LD_CMPL", "BriefDescription": "Vector load instructions completed." }, { - "EventCode": "45054", + "EventCode": "0x45054", "EventName": "PM_FMA_CMPL", "BriefDescription": "Two floating point instructions completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only." }, { - "EventCode": "45056", + "EventCode": "0x45056", "EventName": "PM_SCALAR_FLOP_CMPL", "BriefDescription": "Scalar floating point instructions completed." }, { - "EventCode": "4505C", + "EventCode": "0x4505C", "EventName": "PM_MATH_FLOP_CMPL", "BriefDescription": "Math floating point instructions completed." }, { - "EventCode": "4D05E", + "EventCode": "0x4D05E", "EventName": "PM_BR_CMPL", "BriefDescription": "A branch completed. All branches are included." }, { - "EventCode": "4E15E", + "EventCode": "0x4E15E", "EventName": "PM_MRK_INST_FLUSHED", "BriefDescription": "The marked instruction was flushed." }, { - "EventCode": "401E6", + "EventCode": "0x401E6", "EventName": "PM_MRK_INST_FROM_L3MISS", "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked instruction." }, { - "EventCode": "401E8", + "EventCode": "0x401E8", "EventName": "PM_MRK_DATA_FROM_L2MISS", "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss for a marked load." }, { - "EventCode": "400F0", + "EventCode": "0x400F0", "EventName": "PM_LD_DEMAND_MISS_L1_FIN", "BriefDescription": "Load Missed L1, counted at finish time." }, { - "EventCode": "400FA", + "EventCode": "0x400FA", "EventName": "PM_RUN_INST_CMPL", "BriefDescription": "Completed PowerPC instructions gated by the run latch." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json index b61b5cc157ee..b8aded6045fa 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json @@ -1,296 +1,291 @@ [ { - "EventCode": "100FE", + "EventCode": "0x100FE", "EventName": "PM_INST_CMPL", "BriefDescription": "PowerPC instructions completed." }, { - "EventCode": "10006", - "EventName": "PM_DISP_STALL_HELD_OTHER_CYC", - "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason." - }, - { - "EventCode": "1000C", + "EventCode": "0x1000C", "EventName": "PM_LSU_LD0_FIN", "BriefDescription": "LSU Finished an internal operation in LD0 port." }, { - "EventCode": "1000E", + "EventCode": "0x1000E", "EventName": "PM_MMA_ISSUED", "BriefDescription": "MMA instructions issued." }, { - "EventCode": "10012", + "EventCode": "0x10012", "EventName": "PM_LSU_ST0_FIN", "BriefDescription": "LSU Finished an internal operation in ST0 port." }, { - "EventCode": "10014", + "EventCode": "0x10014", "EventName": "PM_LSU_ST4_FIN", "BriefDescription": "LSU Finished an internal operation in ST4 port." }, { - "EventCode": "10018", + "EventCode": "0x10018", "EventName": "PM_IC_DEMAND_CYC", "BriefDescription": "Cycles in which an instruction reload is pending to satisfy a demand miss." }, { - "EventCode": "10022", + "EventCode": "0x10022", "EventName": "PM_PMC2_SAVED", "BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged." }, { - "EventCode": "10024", + "EventCode": "0x10024", "EventName": "PM_PMC5_OVERFLOW", "BriefDescription": "The event selected for PMC5 caused the event counter to overflow." }, { - "EventCode": "10058", + "EventCode": "0x10058", "EventName": "PM_EXEC_STALL_FIN_AT_DISP", "BriefDescription": "Cycles in which the oldest instruction in the pipeline finished at dispatch and did not require execution in the LSU, BRU or VSU." }, { - "EventCode": "1005A", + "EventCode": "0x1005A", "EventName": "PM_FLUSH_MPRED", "BriefDescription": "A flush occurred due to a mispredicted branch. Includes target and direction." }, { - "EventCode": "1C05A", + "EventCode": "0x1C05A", "EventName": "PM_DERAT_MISS_2M", "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." }, { - "EventCode": "10064", - "EventName": "PM_DISP_STALL_IC_L2", - "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2." + "EventCode": "0x1E05A", + "EventName": "PM_CMPL_STALL_LWSYNC", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete." }, { - "EventCode": "10068", + "EventCode": "0x10068", "EventName": "PM_BR_FIN", "BriefDescription": "A branch instruction finished. Includes predicted/mispredicted/unconditional." }, { - "EventCode": "1006A", + "EventCode": "0x1006A", "EventName": "PM_FX_LSU_FIN", "BriefDescription": "Simple fixed point instruction issued to the store unit. Measured at finish time." }, { - "EventCode": "1006C", + "EventCode": "0x1006C", "EventName": "PM_RUN_CYC_ST_MODE", "BriefDescription": "Cycles when the run latch is set and the core is in ST mode." }, { - "EventCode": "20004", + "EventCode": "0x20004", "EventName": "PM_ISSUE_STALL", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was dispatched but not issued yet." }, { - "EventCode": "2000A", + "EventCode": "0x2000A", "EventName": "PM_HYPERVISOR_CYC", "BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010." }, { - "EventCode": "2000E", + "EventCode": "0x2000E", "EventName": "PM_LSU_LD1_FIN", "BriefDescription": "LSU Finished an internal operation in LD1 port." }, { - "EventCode": "2C014", + "EventCode": "0x2C014", "EventName": "PM_CMPL_STALL_SPECIAL", "BriefDescription": "Cycles in which the oldest instruction in the pipeline required special handling before completing." }, { - "EventCode": "2C018", + "EventCode": "0x2C018", "EventName": "PM_EXEC_STALL_DMISS_L3MISS", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a source beyond the local L2 or local L3." }, { - "EventCode": "2D010", + "EventCode": "0x2D010", "EventName": "PM_LSU_ST1_FIN", "BriefDescription": "LSU Finished an internal operation in ST1 port." }, { - "EventCode": "2D012", + "EventCode": "0x2D012", "EventName": "PM_VSU1_ISSUE", "BriefDescription": "VSU instructions issued to VSU pipe 1." }, { - "EventCode": "2D018", + "EventCode": "0x2D018", "EventName": "PM_EXEC_STALL_VSU", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the VSU (includes FXU, VSU, CRU)." }, { - "EventCode": "2E01E", + "EventCode": "0x2D01C", + "EventName": "PM_CMPL_STALL_STCX", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing." + }, + { + "EventCode": "0x2E01E", "EventName": "PM_EXEC_STALL_NTC_FLUSH", - "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children." + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous NTF instruction is still completing and the new NTF instruction is stalled at dispatch." }, { - "EventCode": "2013C", + "EventCode": "0x2013C", "EventName": "PM_MRK_FX_LSU_FIN", "BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time." }, { - "EventCode": "2405A", + "EventCode": "0x2405A", "EventName": "PM_NTC_FIN", "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. Note that instructions can finish out of order, therefore not all the instructions that finish have a Next-to-complete status." }, { - "EventCode": "201E2", + "EventCode": "0x201E2", "EventName": "PM_MRK_LD_MISS_L1", "BriefDescription": "Marked DL1 Demand Miss counted at finish time." }, { - "EventCode": "200F4", + "EventCode": "0x200F4", "EventName": "PM_RUN_CYC", "BriefDescription": "Processor cycles gated by the run latch." }, { - "EventCode": "30004", - "EventName": "PM_DISP_STALL_FLUSH", - "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC." - }, - { - "EventCode": "30008", + "EventCode": "0x30008", "EventName": "PM_EXEC_STALL", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting to finish in one of the execution units (BRU, LSU, VSU). Only cycles between issue and finish are counted in this category." }, { - "EventCode": "3001A", + "EventCode": "0x3001A", "EventName": "PM_LSU_ST2_FIN", "BriefDescription": "LSU Finished an internal operation in ST2 port." }, { - "EventCode": "30020", + "EventCode": "0x30020", "EventName": "PM_PMC2_REWIND", "BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged." }, { - "EventCode": "30022", + "EventCode": "0x30022", "EventName": "PM_PMC4_SAVED", "BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged." }, { - "EventCode": "30024", + "EventCode": "0x30024", "EventName": "PM_PMC6_OVERFLOW", "BriefDescription": "The event selected for PMC6 caused the event counter to overflow." }, { - "EventCode": "30028", + "EventCode": "0x30028", "EventName": "PM_CMPL_STALL_MEM_ECC", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC." }, { - "EventCode": "30036", + "EventCode": "0x30036", "EventName": "PM_EXEC_STALL_SIMPLE_FX", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a simple fixed point instruction executing in the Load Store Unit." }, { - "EventCode": "3003A", + "EventCode": "0x3003A", "EventName": "PM_CMPL_STALL_EXCEPTION", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete." }, { - "EventCode": "3F044", + "EventCode": "0x3F044", "EventName": "PM_VSU2_ISSUE", "BriefDescription": "VSU instructions issued to VSU pipe 2." }, { - "EventCode": "30058", + "EventCode": "0x30058", "EventName": "PM_TLBIE_FIN", "BriefDescription": "TLBIE instructions finished in the LSU. Two TLBIEs can finish each cycle. All will be counted." }, { - "EventCode": "3D058", + "EventCode": "0x3D058", "EventName": "PM_SCALAR_FSQRT_FDIV_ISSUE", "BriefDescription": "Scalar versions of four floating point operations: fdiv,fsqrt (xvdivdp, xvdivsp, xvsqrtdp, xvsqrtsp)." }, { - "EventCode": "30066", + "EventCode": "0x30066", "EventName": "PM_LSU_FIN", "BriefDescription": "LSU Finished an internal operation (up to 4 per cycle)." }, { - "EventCode": "40004", + "EventCode": "0x40004", "EventName": "PM_FXU_ISSUE", "BriefDescription": "A fixed point instruction was issued to the VSU." }, { - "EventCode": "40008", + "EventCode": "0x40008", "EventName": "PM_NTC_ALL_FIN", "BriefDescription": "Cycles in which both instructions in the ICT entry pair show as finished. These are the cycles between finish and completion for the oldest pair of instructions in the pipeline." }, { - "EventCode": "40010", + "EventCode": "0x40010", "EventName": "PM_PMC3_OVERFLOW", "BriefDescription": "The event selected for PMC3 caused the event counter to overflow." }, { - "EventCode": "4C012", + "EventCode": "0x4C012", "EventName": "PM_EXEC_STALL_DERAT_ONLY_MISS", "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered an ERAT miss and waited for it resolve." }, { - "EventCode": "4C018", + "EventCode": "0x4C018", "EventName": "PM_CMPL_STALL", "BriefDescription": "Cycles in which the oldest instruction in the pipeline cannot complete because the thread was blocked for any reason." }, { - "EventCode": "4C01E", + "EventCode": "0x4C01E", "EventName": "PM_LSU_ST3_FIN", "BriefDescription": "LSU Finished an internal operation in ST3 port." }, { - "EventCode": "4D018", + "EventCode": "0x4D018", "EventName": "PM_EXEC_STALL_BRU", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Branch unit." }, { - "EventCode": "4D01A", + "EventCode": "0x4D01A", "EventName": "PM_CMPL_STALL_HWSYNC", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a hwsync waiting for response from L2 before completing." }, { - "EventCode": "4D01C", + "EventCode": "0x4D01C", "EventName": "PM_EXEC_STALL_TLBIEL", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIEL instruction executing in the Load Store Unit. TLBIEL instructions have lower overhead than TLBIE instructions because they don't get set to the nest." }, { - "EventCode": "4E012", + "EventCode": "0x4E012", "EventName": "PM_EXEC_STALL_UNKNOWN", "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the NTF finishes and completions came too close together." }, { - "EventCode": "4D020", + "EventCode": "0x4D020", "EventName": "PM_VSU3_ISSUE", "BriefDescription": "VSU instruction was issued to VSU pipe 3." }, { - "EventCode": "40132", + "EventCode": "0x40132", "EventName": "PM_MRK_LSU_FIN", "BriefDescription": "LSU marked instruction finish." }, { - "EventCode": "45058", + "EventCode": "0x45058", "EventName": "PM_IC_MISS_CMPL", "BriefDescription": "Non-speculative icache miss, counted at completion." }, { - "EventCode": "4D050", + "EventCode": "0x4D050", "EventName": "PM_VSU_NON_FLOP_CMPL", "BriefDescription": "Non-floating point VSU instructions completed." }, { - "EventCode": "4D052", + "EventCode": "0x4D052", "EventName": "PM_2FLOP_CMPL", "BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed." }, { - "EventCode": "400F2", + "EventCode": "0x400F2", "EventName": "PM_1PLUS_PPC_DISP", "BriefDescription": "Cycles at least one Instr Dispatched." }, { - "EventCode": "400F8", + "EventCode": "0x400F8", "EventName": "PM_FLUSH", "BriefDescription": "Flush (any type)." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json index ea122a91ceb0..b5d1bd39cfb2 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json @@ -1,21 +1,21 @@ [ { - "EventCode": "301E8", + "EventCode": "0x301E8", "EventName": "PM_THRESH_EXC_64", "BriefDescription": "Threshold counter exceeded a value of 64." }, { - "EventCode": "45050", + "EventCode": "0x45050", "EventName": "PM_1FLOP_CMPL", "BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)." }, { - "EventCode": "45052", + "EventCode": "0x45052", "EventName": "PM_4FLOP_CMPL", "BriefDescription": "Four floating point instructions completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)." }, { - "EventCode": "4D054", + "EventCode": "0x4D054", "EventName": "PM_8FLOP_CMPL", "BriefDescription": "Four Double Precision vector instructions completed." } diff --git a/tools/perf/pmu-events/arch/powerpc/power10/translation.json b/tools/perf/pmu-events/arch/powerpc/power10/translation.json index 5a714e3dd71a..db3766dca07c 100644 --- a/tools/perf/pmu-events/arch/powerpc/power10/translation.json +++ b/tools/perf/pmu-events/arch/powerpc/power10/translation.json @@ -1,56 +1,56 @@ [ { - "EventCode": "1F15E", + "EventCode": "0x1F15E", "EventName": "PM_MRK_START_PROBE_NOP_CMPL", "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed." }, { - "EventCode": "20016", + "EventCode": "0x20016", "EventName": "PM_ST_FIN", "BriefDescription": "Store finish count. Includes speculative activity." }, { - "EventCode": "20018", + "EventCode": "0x20018", "EventName": "PM_ST_FWD", "BriefDescription": "Store forwards that finished." }, { - "EventCode": "2011C", + "EventCode": "0x2011C", "EventName": "PM_MRK_NTF_CYC", "BriefDescription": "Cycles during which the marked instruction is the oldest in the pipeline (NTF or NTC)." }, { - "EventCode": "2E01C", + "EventCode": "0x2E01C", "EventName": "PM_EXEC_STALL_TLBIE", "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit." }, { - "EventCode": "201E6", + "EventCode": "0x201E6", "EventName": "PM_THRESH_EXC_32", "BriefDescription": "Threshold counter exceeded a value of 32." }, { - "EventCode": "200F0", + "EventCode": "0x200F0", "EventName": "PM_ST_CMPL", "BriefDescription": "Stores completed from S2Q (2nd-level store queue). This event includes regular stores, stcx and cache inhibited stores. The following operations are excluded (pteupdate, snoop tlbie complete, store atomics, miso, load atomic payloads, tlbie, tlbsync, slbieg, isync, msgsnd, slbiag, cpabort, copy, tcheck, tend, stsync, dcbst, icbi, dcbf, hwsync, lwsync, ptesync, eieio, msgsync)." }, { - "EventCode": "200FE", + "EventCode": "0x200FE", "EventName": "PM_DATA_FROM_L2MISS", "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss." }, { - "EventCode": "30010", + "EventCode": "0x30010", "EventName": "PM_PMC2_OVERFLOW", "BriefDescription": "The event selected for PMC2 caused the event counter to overflow." }, { - "EventCode": "4D010", + "EventCode": "0x4D010", "EventName": "PM_PMC1_SAVED", "BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged." }, { - "EventCode": "4D05C", + "EventCode": "0x4D05C", "EventName": "PM_DPP_FLOP_CMPL", "BriefDescription": "Double-Precision or Quad-Precision instructions completed." } diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index ed4f0bd72e5a..9604446f8360 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -960,7 +960,7 @@ static int get_maxfds(void) struct rlimit rlim; if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) - return min((int)rlim.rlim_max / 2, 512); + return min(rlim.rlim_max / 2, (rlim_t)512); return 512; } @@ -1123,8 +1123,10 @@ static int process_one_file(const char *fpath, const struct stat *sb, mapfile = strdup(fpath); return 0; } - - pr_info("%s: Ignoring file %s\n", prog, fpath); + if (is_json_file(bname)) + pr_debug("%s: ArchStd json is preprocessed %s\n", prog, fpath); + else + pr_info("%s: Ignoring file %s\n", prog, fpath); return 0; } diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 7daa8bb70a5a..711d4f9f5645 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -91,6 +91,11 @@ from __future__ import print_function import sys +# Only change warnings if the python -W option was not used +if not sys.warnoptions: + import warnings + # PySide2 causes deprecation warnings, ignore them. + warnings.filterwarnings("ignore", category=DeprecationWarning) import argparse import weakref import threading @@ -125,8 +130,9 @@ if pyside_version_1: from PySide.QtGui import * from PySide.QtSql import * -from decimal import * -from ctypes import * +from decimal import Decimal, ROUND_HALF_UP +from ctypes import CDLL, Structure, create_string_buffer, addressof, sizeof, \ + c_void_p, c_bool, c_byte, c_char, c_int, c_uint, c_longlong, c_ulonglong from multiprocessing import Process, Array, Value, Event # xrange is range in Python3 @@ -3868,7 +3874,7 @@ def CopyTableCellsToClipboard(view, as_csv=False, with_hdr=False): if with_hdr: model = indexes[0].model() for col in range(min_col, max_col + 1): - val = model.headerData(col, Qt.Horizontal) + val = model.headerData(col, Qt.Horizontal, Qt.DisplayRole) if as_csv: text += sep + ToCSValue(val) sep = "," diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 645009c08b3c..8c10955eff93 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0|1 -size=120 +size=128 config=0 sample_period=* sample_type=263 @@ -16,7 +16,7 @@ pinned=0 exclusive=0 exclude_user=0 exclude_kernel=0|1 -exclude_hv=0 +exclude_hv=0|1 exclude_idle=0 mmap=1 comm=1 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index b0f42c34882e..408164456530 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0 -size=120 +size=128 config=0 sample_period=0 sample_type=65536 diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy index eba723cc0d38..86a15dd359d9 100644 --- a/tools/perf/tests/attr/system-wide-dummy +++ b/tools/perf/tests/attr/system-wide-dummy @@ -7,7 +7,7 @@ cpu=* pid=-1 flags=8 type=1 -size=120 +size=128 config=9 sample_period=4000 sample_type=455 diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c index 76a53126efdf..d4b0ef74defc 100644 --- a/tools/perf/tests/pfm.c +++ b/tools/perf/tests/pfm.c @@ -131,8 +131,8 @@ static int test__pfm_group(void) }, { .events = "{},{instructions}", - .nr_events = 0, - .nr_groups = 0, + .nr_events = 1, + .nr_groups = 1, }, { .events = "{instructions},{instructions}", diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8c0d9f368ebc..b64bdc1a7026 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -145,7 +145,14 @@ perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o perf-$(CONFIG_LIBELF) += probe-event.o +ifdef CONFIG_LIBBPF_DYNAMIC + hashmap := 1 +endif ifndef CONFIG_LIBBPF + hashmap := 1 +endif + +ifdef hashmap perf-y += hashmap.o endif diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index ddb52f748c8e..5ed674a2f55e 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -451,10 +451,10 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, goto out; } - err = -1; link = bpf_program__attach(skel->progs.on_switch); - if (!link) { + if (IS_ERR(link)) { pr_err("Failed to attach leader program\n"); + err = PTR_ERR(link); goto out; } @@ -521,9 +521,10 @@ static int bperf__load(struct evsel *evsel, struct target *target) evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id); if (evsel->bperf_leader_link_fd < 0 && - bperf_reload_leader_program(evsel, attr_map_fd, &entry)) + bperf_reload_leader_program(evsel, attr_map_fd, &entry)) { + err = -1; goto out; - + } /* * The bpf_link holds reference to the leader program, and the * leader program holds reference to the maps. Therefore, if @@ -550,6 +551,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) /* Step 2: load the follower skeleton */ evsel->follower_skel = bperf_follower_bpf__open(); if (!evsel->follower_skel) { + err = -1; pr_err("Failed to open follower skeleton\n"); goto out; } diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index b2f4920e19a6..7d2ba8419b0c 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -975,9 +975,13 @@ static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data) if ((tag == DW_TAG_formal_parameter || tag == DW_TAG_variable) && die_compare_name(die_mem, fvp->name) && - /* Does the DIE have location information or external instance? */ + /* + * Does the DIE have location information or const value + * or external instance? + */ (dwarf_attr(die_mem, DW_AT_external, &attr) || - dwarf_attr(die_mem, DW_AT_location, &attr))) + dwarf_attr(die_mem, DW_AT_location, &attr) || + dwarf_attr(die_mem, DW_AT_const_value, &attr))) return DIE_FIND_CB_END; if (dwarf_haspc(die_mem, fvp->addr)) return DIE_FIND_CB_CONTINUE; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 9130f6fad8d5..bc5e4f294e9e 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -144,6 +144,7 @@ static void perf_env__purge_bpf(struct perf_env *env) node = rb_entry(next, struct bpf_prog_info_node, rb_node); next = rb_next(&node->rb_node); rb_erase(&node->rb_node, root); + free(node->info_linear); free(node); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 8a62fb39e365..19ad64f2bd83 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -100,7 +100,7 @@ enum { PERF_IP_FLAG_VMEXIT = 1ULL << 12, }; -#define PERF_IP_FLAG_CHARS "bcrosyiABEx" +#define PERF_IP_FLAG_CHARS "bcrosyiABExgh" #define PERF_BRANCH_MASK (\ PERF_IP_FLAG_BRANCH |\ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6e5c41528c7d..6ea3e677dc1e 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -425,9 +425,6 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name) if (affinity__setup(&affinity) < 0) return; - evlist__for_each_entry(evlist, pos) - bpf_counter__disable(pos); - /* Disable 'immediate' events last */ for (imm = 0; imm <= 1; imm++) { evlist__for_each_cpu(evlist, i, cpu) { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4a3cd1b5bb33..a8d8463f8ee5 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -428,6 +428,7 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->auto_merge_stats = orig->auto_merge_stats; evsel->collect_stat = orig->collect_stat; evsel->weak_group = orig->weak_group; + evsel->use_config_name = orig->use_config_name; if (evsel__copy_config_terms(evsel, orig) < 0) goto out_err; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 75cf5dbfe208..bdad52a06438 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -83,8 +83,10 @@ struct evsel { bool collect_stat; bool weak_group; bool bpf_counter; + bool use_config_name; int bpf_fd; struct bpf_object *bpf_obj; + struct list_head config_terms; }; /* @@ -116,10 +118,8 @@ struct evsel { bool merged_stat; bool reset_group; bool errored; - bool use_config_name; struct hashmap *per_pkg_mask; struct evsel *leader; - struct list_head config_terms; int err; int cpu_iter; struct { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 8c59677bee13..20ad663978cc 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1146,6 +1146,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) decoder->set_fup_tx_flags = false; decoder->tx_flags = decoder->fup_tx_flags; decoder->state.type = INTEL_PT_TRANSACTION; + if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX) + decoder->state.type |= INTEL_PT_BRANCH; decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; decoder->state.flags = decoder->fup_tx_flags; @@ -1220,8 +1222,10 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) return 0; if (err == -EAGAIN || intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) { + bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - if (intel_pt_fup_event(decoder)) + if (intel_pt_fup_event(decoder) && no_tip) return 0; return -EAGAIN; } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 8658d42ce57a..0dfec8761b9a 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -707,8 +707,10 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, *ip += intel_pt_insn->length; - if (to_ip && *ip == to_ip) + if (to_ip && *ip == to_ip) { + intel_pt_insn->length = 0; goto out_no_cache; + } if (*ip >= al.map->end) break; @@ -1198,6 +1200,7 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, static void intel_pt_sample_flags(struct intel_pt_queue *ptq) { + ptq->insn_len = 0; if (ptq->state->flags & INTEL_PT_ABORT_TX) { ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; } else if (ptq->state->flags & INTEL_PT_ASYNC) { @@ -1211,7 +1214,6 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_INTERRUPT; - ptq->insn_len = 0; } else { if (ptq->state->from_ip) ptq->flags = intel_pt_insn_type(ptq->state->insn_op); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4dad14265b81..84108c17f48d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -150,6 +150,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { .symbol = "bpf-output", .alias = "", }, + [PERF_COUNT_SW_CGROUP_SWITCHES] = { + .symbol = "cgroup-switches", + .alias = "", + }, }; #define __PERF_EVENT_FIELD(config, name) \ @@ -2928,9 +2932,14 @@ restart: } for (i = 0; i < max; i++, syms++) { + /* + * New attr.config still not supported here, the latest + * example was PERF_COUNT_SW_CGROUP_SWITCHES + */ + if (syms->symbol == NULL) + continue; - if (event_glob != NULL && syms->symbol != NULL && - !(strglobmatch(syms->symbol, event_glob) || + if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) || (syms->alias && strglobmatch(syms->alias, event_glob)))) continue; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index fb8646cc3e83..923849024b15 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -347,6 +347,7 @@ emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EM dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } +cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } /* * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately. diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 829af17a0867..020411682a3c 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -103,6 +103,11 @@ static void perf_probe_build_id(struct evsel *evsel) evsel->core.attr.build_id = 1; } +static void perf_probe_cgroup(struct evsel *evsel) +{ + evsel->core.attr.cgroup = 1; +} + bool perf_can_sample_identifier(void) { return perf_probe_api(perf_probe_sample_identifier); @@ -182,3 +187,8 @@ bool perf_can_record_build_id(void) { return perf_probe_api(perf_probe_build_id); } + +bool perf_can_record_cgroup(void) +{ + return perf_probe_api(perf_probe_cgroup); +} diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h index f12ca55f509a..b104168efb15 100644 --- a/tools/perf/util/perf_api_probe.h +++ b/tools/perf/util/perf_api_probe.h @@ -12,5 +12,6 @@ bool perf_can_record_switch_events(void); bool perf_can_record_text_poke_events(void); bool perf_can_sample_identifier(void); bool perf_can_record_build_id(void); +bool perf_can_record_cgroup(void); #endif // __PERF_API_PROBE_H diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index d735acb6c29c..6eef6dfeaa57 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -62,8 +62,16 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, } /* no event */ - if (*q == '\0') + if (*q == '\0') { + if (*sep == '}') { + if (grp_evt < 0) { + ui__error("cannot close a non-existing event group\n"); + goto error; + } + grp_evt--; + } continue; + } memset(&attr, 0, sizeof(attr)); event_attr_init(&attr); @@ -107,6 +115,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, grp_evt = -1; } } + free(p_orig); return 0; error: free(p_orig); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 866f2d514d72..b029c29ce227 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -190,6 +190,9 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, immediate_value_is_supported()) { Dwarf_Sword snum; + if (!tvar) + return 0; + dwarf_formsdata(&attr, &snum); ret = asprintf(&tvar->value, "\\%ld", (long)snum); diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index f99852d54b14..43e5b563dee8 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -157,9 +157,15 @@ static int get_max_rate(unsigned int *rate) static int record_opts__config_freq(struct record_opts *opts) { bool user_freq = opts->user_freq != UINT_MAX; + bool user_interval = opts->user_interval != ULLONG_MAX; unsigned int max_rate; - if (opts->user_interval != ULLONG_MAX) + if (user_interval && user_freq) { + pr_err("cannot set frequency and period at the same time\n"); + return -1; + } + + if (user_interval) opts->default_interval = opts->user_interval; if (user_freq) opts->freq = opts->user_freq; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a12cf4f0e97a..e59242c361ce 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -904,7 +904,7 @@ static void perf_event__cpu_map_swap(union perf_event *event, struct perf_record_record_cpu_map *mask; unsigned i; - data->type = bswap_64(data->type); + data->type = bswap_16(data->type); switch (data->type) { case PERF_CPU_MAP__CPUS: @@ -937,7 +937,7 @@ static void perf_event__stat_config_swap(union perf_event *event, { u64 size; - size = event->stat_config.nr * sizeof(event->stat_config.data[0]); + size = bswap_64(event->stat_config.nr) * sizeof(event->stat_config.data[0]); size += 1; /* nr item itself */ mem_bswap_64(&event->stat_config.nr, size); } @@ -1723,6 +1723,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, if (event->header.size < hdr_sz || event->header.size > buf_sz) return -1; + buf += hdr_sz; rest = event->header.size - hdr_sz; if (readn(fd, buf, rest) != (ssize_t)rest) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a76fff5e7d83..ca326f98c7a2 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -541,7 +541,7 @@ static void uniquify_event_name(struct evsel *counter) char *config; int ret = 0; - if (counter->uniquified_name || + if (counter->uniquified_name || counter->use_config_name || !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, strlen(counter->pmu_name))) return; @@ -555,10 +555,8 @@ static void uniquify_event_name(struct evsel *counter) } } else { if (perf_pmu__has_hybrid()) { - if (!counter->use_config_name) { - ret = asprintf(&new_name, "%s/%s/", - counter->pmu_name, counter->name); - } + ret = asprintf(&new_name, "%s/%s/", + counter->pmu_name, counter->name); } else { ret = asprintf(&new_name, "%s [%s]", counter->name, counter->pmu_name); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 4c56aa837434..a73345730ba9 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -2412,6 +2412,7 @@ int cleanup_sdt_note_list(struct list_head *sdt_notes) list_for_each_entry_safe(pos, tmp, sdt_notes, note_list) { list_del_init(&pos->note_list); + zfree(&pos->args); zfree(&pos->name); zfree(&pos->provider); free(pos); diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index f9271f3ea912..071312f5eb92 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -131,29 +131,29 @@ QUIET_SUBDIR1 = ifneq ($(silent),1) ifneq ($(V),1) - QUIET_CC = @echo ' CC '$@; - QUIET_CC_FPIC = @echo ' CC FPIC '$@; - QUIET_CLANG = @echo ' CLANG '$@; - QUIET_AR = @echo ' AR '$@; - QUIET_LINK = @echo ' LINK '$@; - QUIET_MKDIR = @echo ' MKDIR '$@; - QUIET_GEN = @echo ' GEN '$@; + QUIET_CC = @echo ' CC '$@; + QUIET_CC_FPIC = @echo ' CC FPIC '$@; + QUIET_CLANG = @echo ' CLANG '$@; + QUIET_AR = @echo ' AR '$@; + QUIET_LINK = @echo ' LINK '$@; + QUIET_MKDIR = @echo ' MKDIR '$@; + QUIET_GEN = @echo ' GEN '$@; QUIET_SUBDIR0 = +@subdir= QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ - echo ' SUBDIR '$$subdir; \ + echo ' SUBDIR '$$subdir; \ $(MAKE) $(PRINT_DIR) -C $$subdir - QUIET_FLEX = @echo ' FLEX '$@; - QUIET_BISON = @echo ' BISON '$@; - QUIET_GENSKEL = @echo ' GEN-SKEL '$@; + QUIET_FLEX = @echo ' FLEX '$@; + QUIET_BISON = @echo ' BISON '$@; + QUIET_GENSKEL = @echo ' GENSKEL '$@; descend = \ - +@echo ' DESCEND '$(1); \ + +@echo ' DESCEND '$(1); \ mkdir -p $(OUTPUT)$(1) && \ $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2) - QUIET_CLEAN = @printf ' CLEAN %s\n' $1; - QUIET_INSTALL = @printf ' INSTALL %s\n' $1; - QUIET_UNINST = @printf ' UNINST %s\n' $1; + QUIET_CLEAN = @printf ' CLEAN %s\n' $1; + QUIET_INSTALL = @printf ' INSTALL %s\n' $1; + QUIET_UNINST = @printf ' UNINST %s\n' $1; endif endif diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index c62d372d426f..ed563bdd88f3 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -62,7 +62,7 @@ struct nfit_test_resource *get_nfit_res(resource_size_t resource) } EXPORT_SYMBOL(get_nfit_res); -void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, +static void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, void __iomem *(*fallback_fn)(resource_size_t, unsigned long)) { struct nfit_test_resource *nfit_res = get_nfit_res(offset); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 9b185bf82da8..54f367cbadae 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1871,9 +1871,16 @@ static void smart_init(struct nfit_test *t) } } +static size_t sizeof_spa(struct acpi_nfit_system_address *spa) +{ + /* until spa location cookie support is added... */ + return sizeof(*spa) - 8; +} + static int nfit_test0_alloc(struct nfit_test *t) { - size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA + struct acpi_nfit_system_address *spa = NULL; + size_t nfit_size = sizeof_spa(spa) * NUM_SPA + sizeof(struct acpi_nfit_memory_map) * NUM_MEM + sizeof(struct acpi_nfit_control_region) * NUM_DCR + offsetof(struct acpi_nfit_control_region, @@ -1937,7 +1944,8 @@ static int nfit_test0_alloc(struct nfit_test *t) static int nfit_test1_alloc(struct nfit_test *t) { - size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2 + struct acpi_nfit_system_address *spa = NULL; + size_t nfit_size = sizeof_spa(spa) * 2 + sizeof(struct acpi_nfit_memory_map) * 2 + offsetof(struct acpi_nfit_control_region, window_size) * 2; int i; @@ -2000,7 +2008,7 @@ static void nfit_test0_setup(struct nfit_test *t) */ spa = nfit_buf; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 0+1; spa->address = t->spa_set_dma[0]; @@ -2014,7 +2022,7 @@ static void nfit_test0_setup(struct nfit_test *t) */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 1+1; spa->address = t->spa_set_dma[1]; @@ -2024,7 +2032,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa2 (dcr0) dimm0 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 2+1; spa->address = t->dcr_dma[0]; @@ -2034,7 +2042,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa3 (dcr1) dimm1 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 3+1; spa->address = t->dcr_dma[1]; @@ -2044,7 +2052,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa4 (dcr2) dimm2 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 4+1; spa->address = t->dcr_dma[2]; @@ -2054,7 +2062,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa5 (dcr3) dimm3 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 5+1; spa->address = t->dcr_dma[3]; @@ -2064,7 +2072,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa6 (bdw for dcr0) dimm0 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 6+1; spa->address = t->dimm_dma[0]; @@ -2074,7 +2082,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa7 (bdw for dcr1) dimm1 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 7+1; spa->address = t->dimm_dma[1]; @@ -2084,7 +2092,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa8 (bdw for dcr2) dimm2 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 8+1; spa->address = t->dimm_dma[2]; @@ -2094,7 +2102,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa9 (bdw for dcr3) dimm3 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 9+1; spa->address = t->dimm_dma[3]; @@ -2581,7 +2589,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa10 (dcr4) dimm4 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 10+1; spa->address = t->dcr_dma[4]; @@ -2595,7 +2603,7 @@ static void nfit_test0_setup(struct nfit_test *t) */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 11+1; spa->address = t->spa_set_dma[2]; @@ -2605,7 +2613,7 @@ static void nfit_test0_setup(struct nfit_test *t) /* spa12 (bdw for dcr4) dimm4 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 12+1; spa->address = t->dimm_dma[4]; @@ -2739,7 +2747,7 @@ static void nfit_test1_setup(struct nfit_test *t) /* spa0 (flat range with no bdw aliasing) */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 0+1; spa->address = t->spa_set_dma[0]; @@ -2749,7 +2757,7 @@ static void nfit_test1_setup(struct nfit_test *t) /* virtual cd region */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; - spa->header.length = sizeof(*spa); + spa->header.length = sizeof_spa(spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16); spa->range_index = 0; spa->address = t->spa_set_dma[1]; diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c index 656b04976ccc..67b77ab83c20 100644 --- a/tools/testing/selftests/arm64/bti/test.c +++ b/tools/testing/selftests/arm64/bti/test.c @@ -6,6 +6,7 @@ #include "system.h" +#include <stddef.h> #include <linux/errno.h> #include <linux/auxvec.h> #include <linux/signal.h> diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 4866f6a21901..addcfd8b615e 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -10,6 +10,7 @@ FEATURE-DUMP.libbpf fixdep test_dev_cgroup /test_progs* +!test_progs.h test_verifier_log feature test_sock @@ -30,10 +31,13 @@ test_sysctl xdping test_cpp *.skel.h +*.lskel.h /no_alu32 /bpf_gcc /tools /runqslower /bench *.ko +*.tmp xdpxceiver +xdp_redirect_multi diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 511259c2c6c5..f405b20c1e6c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -54,6 +54,7 @@ TEST_FILES = xsk_prereqs.sh \ # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ test_xdp_redirect.sh \ + test_xdp_redirect_multi.sh \ test_xdp_meta.sh \ test_xdp_veth.sh \ test_offload.py \ @@ -84,7 +85,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ - xdpxceiver + xdpxceiver xdp_redirect_multi TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read @@ -312,6 +313,10 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ linked_vars.skel.h linked_maps.skel.h +LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \ + test_ksyms_module.c test_ringbuf.c atomics.c trace_printk.c +SKEL_BLACKLIST += $$(LSKELS) + test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o @@ -339,6 +344,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS) TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \ $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\ $$(TRUNNER_BPF_SRCS))) +TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS)) TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS)) TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS) @@ -380,6 +386,14 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) $(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@ +$(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT) + $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) + $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$< + $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o) + $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o) + $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o) + $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@ + $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o)) $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps)) @@ -409,6 +423,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_EXTRA_HDRS) \ $(TRUNNER_BPF_OBJS) \ $(TRUNNER_BPF_SKELS) \ + $(TRUNNER_BPF_LSKELS) \ $(TRUNNER_BPF_SKELS_LINKED) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) @@ -516,6 +531,6 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature \ - $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko) + $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h no_alu32 bpf_gcc bpf_testmod.ko) .PHONY: docs docs-clean diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs index ccf260021e83..eb6a4fea8c79 100644 --- a/tools/testing/selftests/bpf/Makefile.docs +++ b/tools/testing/selftests/bpf/Makefile.docs @@ -52,7 +52,8 @@ $(OUTPUT)%.$2: $(OUTPUT)%.rst ifndef RST2MAN_DEP $$(error "rst2man not found, but required to generate man pages") endif - $$(QUIET_GEN)rst2man $$< > $$@ + $$(QUIET_GEN)rst2man --exit-status=1 $$< > $$@.tmp + $$(QUIET_GEN)mv $$@.tmp $$@ docs-clean-$1: $$(call QUIET_CLEAN, eBPF_$1-manpage) diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 3353778c30f8..8deec1ca9150 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -202,3 +202,22 @@ generate valid BTF information for weak variables. Please make sure you use Clang that contains the fix. __ https://reviews.llvm.org/D100362 + +Clang relocation changes +======================== + +Clang 13 patch `clang reloc patch`_ made some changes on relocations such +that existing relocation types are broken into more types and +each new type corresponds to only one way to resolve relocation. +See `kernel llvm reloc`_ for more explanation and some examples. +Using clang 13 to compile old libbpf which has static linker support, +there will be a compilation failure:: + + libbpf: ELF relo #0 in section #6 has unexpected type 2 in .../bpf_tcp_nogpl.o + +Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``. +To fix this issue, user newer libbpf. + +.. Links +.. _clang reloc patch: https://reviews.llvm.org/D102712 +.. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 332ed2f7b402..6ea15b93a2f8 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -43,6 +43,7 @@ void setup_libbpf() { int err; + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); libbpf_set_print(libbpf_print_fn); err = bump_memlock_rlimit(); diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c index a967674098ad..c7ec114eca56 100644 --- a/tools/testing/selftests/bpf/benchs/bench_rename.c +++ b/tools/testing/selftests/bpf/benchs/bench_rename.c @@ -65,7 +65,7 @@ static void attach_bpf(struct bpf_program *prog) struct bpf_link *link; link = bpf_program__attach(prog); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program!\n"); exit(1); } diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index bde6c9d4cbd4..d167bffac679 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -181,7 +181,7 @@ static void ringbuf_libbpf_setup() } link = bpf_program__attach(ctx->skel->progs.bench_ringbuf); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program!\n"); exit(1); } @@ -271,7 +271,7 @@ static void ringbuf_custom_setup() } link = bpf_program__attach(ctx->skel->progs.bench_ringbuf); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program\n"); exit(1); } @@ -430,7 +430,7 @@ static void perfbuf_libbpf_setup() } link = bpf_program__attach(ctx->skel->progs.bench_perfbuf); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program\n"); exit(1); } diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 2a0b6c9885a4..f41a491a8cc0 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -60,7 +60,7 @@ static void attach_bpf(struct bpf_program *prog) struct bpf_link *link; link = bpf_program__attach(prog); - if (IS_ERR(link)) { + if (!link) { fprintf(stderr, "failed to attach program!\n"); exit(1); } diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 12ee40284da0..2060bc122c53 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -40,7 +40,7 @@ struct ipv6_packet pkt_v6 = { .tcp.doff = 5, }; -static int settimeo(int fd, int timeout_ms) +int settimeo(int fd, int timeout_ms) { struct timeval timeout = { .tv_sec = 3 }; diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 7205f8afdba1..5e0d51c07b63 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -33,6 +33,7 @@ struct ipv6_packet { } __packed; extern struct ipv6_packet pkt_v6; +int settimeo(int fd, int timeout_ms); int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); int connect_to_fd(int server_fd, int timeout_ms); diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c index 21efe7bbf10d..ba0e1efe5a45 100644 --- a/tools/testing/selftests/bpf/prog_tests/atomics.c +++ b/tools/testing/selftests/bpf/prog_tests/atomics.c @@ -2,19 +2,19 @@ #include <test_progs.h> -#include "atomics.skel.h" +#include "atomics.lskel.h" static void test_add(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.add); - if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__add__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(add)")) return; - prog_fd = bpf_program__fd(skel->progs.add); + prog_fd = skel->progs.add.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run add", @@ -33,20 +33,20 @@ static void test_add(struct atomics *skel) ASSERT_EQ(skel->data->add_noreturn_value, 3, "add_noreturn_value"); cleanup: - bpf_link__destroy(link); + close(link_fd); } static void test_sub(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.sub); - if (CHECK(IS_ERR(link), "attach(sub)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__sub__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(sub)")) return; - prog_fd = bpf_program__fd(skel->progs.sub); + prog_fd = skel->progs.sub.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run sub", @@ -66,20 +66,20 @@ static void test_sub(struct atomics *skel) ASSERT_EQ(skel->data->sub_noreturn_value, -1, "sub_noreturn_value"); cleanup: - bpf_link__destroy(link); + close(link_fd); } static void test_and(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.and); - if (CHECK(IS_ERR(link), "attach(and)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__and__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(and)")) return; - prog_fd = bpf_program__fd(skel->progs.and); + prog_fd = skel->progs.and.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run and", @@ -94,20 +94,20 @@ static void test_and(struct atomics *skel) ASSERT_EQ(skel->data->and_noreturn_value, 0x010ull << 32, "and_noreturn_value"); cleanup: - bpf_link__destroy(link); + close(link_fd); } static void test_or(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.or); - if (CHECK(IS_ERR(link), "attach(or)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__or__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(or)")) return; - prog_fd = bpf_program__fd(skel->progs.or); + prog_fd = skel->progs.or.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run or", @@ -123,20 +123,20 @@ static void test_or(struct atomics *skel) ASSERT_EQ(skel->data->or_noreturn_value, 0x111ull << 32, "or_noreturn_value"); cleanup: - bpf_link__destroy(link); + close(link_fd); } static void test_xor(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.xor); - if (CHECK(IS_ERR(link), "attach(xor)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__xor__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(xor)")) return; - prog_fd = bpf_program__fd(skel->progs.xor); + prog_fd = skel->progs.xor.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run xor", @@ -151,20 +151,20 @@ static void test_xor(struct atomics *skel) ASSERT_EQ(skel->data->xor_noreturn_value, 0x101ull << 32, "xor_nxoreturn_value"); cleanup: - bpf_link__destroy(link); + close(link_fd); } static void test_cmpxchg(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.cmpxchg); - if (CHECK(IS_ERR(link), "attach(cmpxchg)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__cmpxchg__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)")) return; - prog_fd = bpf_program__fd(skel->progs.cmpxchg); + prog_fd = skel->progs.cmpxchg.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run add", @@ -180,20 +180,20 @@ static void test_cmpxchg(struct atomics *skel) ASSERT_EQ(skel->bss->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed"); cleanup: - bpf_link__destroy(link); + close(link_fd); } static void test_xchg(struct atomics *skel) { int err, prog_fd; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; - link = bpf_program__attach(skel->progs.xchg); - if (CHECK(IS_ERR(link), "attach(xchg)", "err: %ld\n", PTR_ERR(link))) + link_fd = atomics__xchg__attach(skel); + if (!ASSERT_GT(link_fd, 0, "attach(xchg)")) return; - prog_fd = bpf_program__fd(skel->progs.xchg); + prog_fd = skel->progs.xchg.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); if (CHECK(err || retval, "test_run add", @@ -207,7 +207,7 @@ static void test_xchg(struct atomics *skel) ASSERT_EQ(skel->bss->xchg32_result, 1, "xchg32_result"); cleanup: - bpf_link__destroy(link); + close(link_fd); } void test_atomics(void) diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 9dc4e3dfbcf3..ec11e20d2b92 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -85,16 +85,14 @@ void test_attach_probe(void) kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe, false /* retprobe */, SYS_NANOSLEEP_KPROBE_NAME); - if (CHECK(IS_ERR(kprobe_link), "attach_kprobe", - "err %ld\n", PTR_ERR(kprobe_link))) + if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe")) goto cleanup; skel->links.handle_kprobe = kprobe_link; kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe, true /* retprobe */, SYS_NANOSLEEP_KPROBE_NAME); - if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe", - "err %ld\n", PTR_ERR(kretprobe_link))) + if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe")) goto cleanup; skel->links.handle_kretprobe = kretprobe_link; @@ -103,8 +101,7 @@ void test_attach_probe(void) 0 /* self pid */, "/proc/self/exe", uprobe_offset); - if (CHECK(IS_ERR(uprobe_link), "attach_uprobe", - "err %ld\n", PTR_ERR(uprobe_link))) + if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe")) goto cleanup; skel->links.handle_uprobe = uprobe_link; @@ -113,8 +110,7 @@ void test_attach_probe(void) -1 /* any pid */, "/proc/self/exe", uprobe_offset); - if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe", - "err %ld\n", PTR_ERR(uretprobe_link))) + if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe")) goto cleanup; skel->links.handle_uretprobe = uretprobe_link; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 2d3590cfb5e1..1f1aade56504 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -47,7 +47,7 @@ static void do_dummy_read(struct bpf_program *prog) int iter_fd, len; link = bpf_program__attach_iter(prog, NULL); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) return; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -201,7 +201,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) int ret = 0; link = bpf_program__attach_iter(prog, NULL); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) return ret; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -396,7 +396,7 @@ static void test_file_iter(void) return; link = bpf_program__attach_iter(skel1->progs.dump_task, NULL); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; /* unlink this path if it exists. */ @@ -502,7 +502,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) skel->bss->map2_id = map_info.id; link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto free_map2; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -607,14 +607,12 @@ static void test_bpf_hash_map(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); - if (CHECK(!IS_ERR(link), "attach_iter", - "attach_iter for hashmap2 unexpected succeeded\n")) + if (!ASSERT_ERR_PTR(link, "attach_iter")) goto out; linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); - if (CHECK(!IS_ERR(link), "attach_iter", - "attach_iter for hashmap3 unexpected succeeded\n")) + if (!ASSERT_ERR_PTR(link, "attach_iter")) goto out; /* hashmap1 should be good, update map values here */ @@ -636,7 +634,7 @@ static void test_bpf_hash_map(void) linfo.map.map_fd = map_fd; link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -727,7 +725,7 @@ static void test_bpf_percpu_hash_map(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -798,7 +796,7 @@ static void test_bpf_array_map(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -894,7 +892,7 @@ static void test_bpf_percpu_array_map(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -957,7 +955,7 @@ static void test_bpf_sk_storage_delete(void) opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -1075,7 +1073,7 @@ static void test_bpf_sk_storage_map(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -1128,7 +1126,7 @@ static void test_rdonly_buf_out_of_bound(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); - if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n")) + if (!ASSERT_ERR_PTR(link, "attach_iter")) bpf_link__destroy(link); bpf_iter_test_kern5__destroy(skel); @@ -1186,8 +1184,7 @@ static void test_task_vma(void) skel->links.proc_maps = bpf_program__attach_iter( skel->progs.proc_maps, NULL); - if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter", - "attach iterator failed\n")) { + if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) { skel->links.proc_maps = NULL; goto out; } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index e25917f04602..efe1e979affb 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -82,7 +82,7 @@ static void *server(void *arg) bytes, total_bytes, nr_sent, errno); done: - if (fd != -1) + if (fd >= 0) close(fd); if (err) { WRITE_ONCE(stop, 1); @@ -191,8 +191,7 @@ static void test_cubic(void) return; link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic); - if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n", - PTR_ERR(link))) { + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) { bpf_cubic__destroy(cubic_skel); return; } @@ -213,8 +212,7 @@ static void test_dctcp(void) return; link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp); - if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n", - PTR_ERR(link))) { + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) { bpf_dctcp__destroy(dctcp_skel); return; } diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 0457ae32b270..857e3f26086f 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3811,7 +3811,7 @@ static void do_test_raw(unsigned int test_num) always_log); free(raw_btf); - err = ((btf_fd == -1) != test->btf_load_err); + err = ((btf_fd < 0) != test->btf_load_err); if (CHECK(err, "btf_fd:%d test->btf_load_err:%u", btf_fd, test->btf_load_err) || CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), @@ -3820,7 +3820,7 @@ static void do_test_raw(unsigned int test_num) goto done; } - if (err || btf_fd == -1) + if (err || btf_fd < 0) goto done; create_attr.name = test->map_name; @@ -3834,16 +3834,16 @@ static void do_test_raw(unsigned int test_num) map_fd = bpf_create_map_xattr(&create_attr); - err = ((map_fd == -1) != test->map_create_err); + err = ((map_fd < 0) != test->map_create_err); CHECK(err, "map_fd:%d test->map_create_err:%u", map_fd, test->map_create_err); done: if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); - if (btf_fd != -1) + if (btf_fd >= 0) close(btf_fd); - if (map_fd != -1) + if (map_fd >= 0) close(map_fd); } @@ -3941,7 +3941,7 @@ static int test_big_btf_info(unsigned int test_num) btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, always_log); - if (CHECK(btf_fd == -1, "errno:%d", errno)) { + if (CHECK(btf_fd < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -3987,7 +3987,7 @@ done: free(raw_btf); free(user_btf); - if (btf_fd != -1) + if (btf_fd >= 0) close(btf_fd); return err; @@ -4029,7 +4029,7 @@ static int test_btf_id(unsigned int test_num) btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, always_log); - if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) { + if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4043,7 +4043,7 @@ static int test_btf_id(unsigned int test_num) } btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id); - if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) { + if (CHECK(btf_fd[1] < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4071,7 +4071,7 @@ static int test_btf_id(unsigned int test_num) create_attr.btf_value_type_id = 2; map_fd = bpf_create_map_xattr(&create_attr); - if (CHECK(map_fd == -1, "errno:%d", errno)) { + if (CHECK(map_fd < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4094,7 +4094,7 @@ static int test_btf_id(unsigned int test_num) /* Test BTF ID is removed from the kernel */ btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id); - if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) { + if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4105,7 +4105,7 @@ static int test_btf_id(unsigned int test_num) close(map_fd); map_fd = -1; btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id); - if (CHECK(btf_fd[0] != -1, "BTF lingers")) { + if (CHECK(btf_fd[0] >= 0, "BTF lingers")) { err = -1; goto done; } @@ -4117,11 +4117,11 @@ done: fprintf(stderr, "\n%s", btf_log_buf); free(raw_btf); - if (map_fd != -1) + if (map_fd >= 0) close(map_fd); for (i = 0; i < 2; i++) { free(user_btf[i]); - if (btf_fd[i] != -1) + if (btf_fd[i] >= 0) close(btf_fd[i]); } @@ -4166,7 +4166,7 @@ static void do_test_get_info(unsigned int test_num) btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, always_log); - if (CHECK(btf_fd == -1, "errno:%d", errno)) { + if (CHECK(btf_fd <= 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4212,7 +4212,7 @@ done: free(raw_btf); free(user_btf); - if (btf_fd != -1) + if (btf_fd >= 0) close(btf_fd); } @@ -4249,8 +4249,9 @@ static void do_test_file(unsigned int test_num) return; btf = btf__parse_elf(test->file, &btf_ext); - if (IS_ERR(btf)) { - if (PTR_ERR(btf) == -ENOENT) { + err = libbpf_get_error(btf); + if (err) { + if (err == -ENOENT) { printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC); test__skip(); return; @@ -4263,7 +4264,8 @@ static void do_test_file(unsigned int test_num) btf_ext__free(btf_ext); obj = bpf_object__open(test->file); - if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj))) + err = libbpf_get_error(obj); + if (CHECK(err, "obj: %d", err)) return; prog = bpf_program__next(NULL, obj); @@ -4298,7 +4300,7 @@ static void do_test_file(unsigned int test_num) info_len = sizeof(struct bpf_prog_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) { fprintf(stderr, "%s\n", btf_log_buf); err = -1; goto done; @@ -4330,7 +4332,7 @@ static void do_test_file(unsigned int test_num) err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) { fprintf(stderr, "%s\n", btf_log_buf); err = -1; goto done; @@ -4886,7 +4888,7 @@ static void do_test_pprint(int test_num) always_log); free(raw_btf); - if (CHECK(btf_fd == -1, "errno:%d", errno)) { + if (CHECK(btf_fd < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4901,7 +4903,7 @@ static void do_test_pprint(int test_num) create_attr.btf_value_type_id = test->value_type_id; map_fd = bpf_create_map_xattr(&create_attr); - if (CHECK(map_fd == -1, "errno:%d", errno)) { + if (CHECK(map_fd < 0, "errno:%d", errno)) { err = -1; goto done; } @@ -4982,7 +4984,7 @@ static void do_test_pprint(int test_num) err = check_line(expected_line, nexpected_line, sizeof(expected_line), line); - if (err == -1) + if (err < 0) goto done; } @@ -4998,7 +5000,7 @@ static void do_test_pprint(int test_num) cpu, cmapv); err = check_line(expected_line, nexpected_line, sizeof(expected_line), line); - if (err == -1) + if (err < 0) goto done; cmapv = cmapv + rounded_value_size; @@ -5036,9 +5038,9 @@ done: fprintf(stderr, "OK"); if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); - if (btf_fd != -1) + if (btf_fd >= 0) close(btf_fd); - if (map_fd != -1) + if (map_fd >= 0) close(map_fd); if (pin_file) fclose(pin_file); @@ -5950,7 +5952,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test, /* get necessary lens */ info_len = sizeof(struct bpf_prog_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) { fprintf(stderr, "%s\n", btf_log_buf); return -1; } @@ -5980,7 +5982,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test, info.func_info_rec_size = rec_size; info.func_info = ptr_to_u64(func_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) { fprintf(stderr, "%s\n", btf_log_buf); err = -1; goto done; @@ -6044,7 +6046,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test, info_len = sizeof(struct bpf_prog_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); - if (CHECK(err == -1, "err:%d errno:%d", err, errno)) { + if (CHECK(err < 0, "err:%d errno:%d", err, errno)) { err = -1; goto done; } @@ -6123,7 +6125,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test, * Only recheck the info.*line_info* fields. * Other fields are not the concern of this test. */ - if (CHECK(err == -1 || + if (CHECK(err < 0 || info.nr_line_info != cnt || (jited_cnt && !info.jited_line_info) || info.nr_jited_line_info != jited_cnt || @@ -6260,7 +6262,7 @@ static void do_test_info_raw(unsigned int test_num) always_log); free(raw_btf); - if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) { + if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) { err = -1; goto done; } @@ -6273,7 +6275,8 @@ static void do_test_info_raw(unsigned int test_num) patched_linfo = patch_name_tbd(test->line_info, test->str_sec, linfo_str_off, test->str_sec_size, &linfo_size); - if (IS_ERR(patched_linfo)) { + err = libbpf_get_error(patched_linfo); + if (err) { fprintf(stderr, "error in creating raw bpf_line_info"); err = -1; goto done; @@ -6297,7 +6300,7 @@ static void do_test_info_raw(unsigned int test_num) } prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); - err = ((prog_fd == -1) != test->expected_prog_load_failure); + err = ((prog_fd < 0) != test->expected_prog_load_failure); if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d", prog_fd, test->expected_prog_load_failure, errno) || CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), @@ -6306,7 +6309,7 @@ static void do_test_info_raw(unsigned int test_num) goto done; } - if (prog_fd == -1) + if (prog_fd < 0) goto done; err = test_get_finfo(test, prog_fd); @@ -6323,12 +6326,12 @@ done: if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); - if (btf_fd != -1) + if (btf_fd >= 0) close(btf_fd); - if (prog_fd != -1) + if (prog_fd >= 0) close(prog_fd); - if (!IS_ERR(patched_linfo)) + if (!libbpf_get_error(patched_linfo)) free(patched_linfo); } @@ -6839,9 +6842,9 @@ static void do_test_dedup(unsigned int test_num) return; test_btf = btf__new((__u8 *)raw_btf, raw_btf_size); + err = libbpf_get_error(test_btf); free(raw_btf); - if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld", - PTR_ERR(test_btf))) { + if (CHECK(err, "invalid test_btf errno:%d", err)) { err = -1; goto done; } @@ -6853,9 +6856,9 @@ static void do_test_dedup(unsigned int test_num) if (!raw_btf) return; expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size); + err = libbpf_get_error(expect_btf); free(raw_btf); - if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld", - PTR_ERR(expect_btf))) { + if (CHECK(err, "invalid expect_btf errno:%d", err)) { err = -1; goto done; } @@ -6966,10 +6969,8 @@ static void do_test_dedup(unsigned int test_num) } done: - if (!IS_ERR(test_btf)) - btf__free(test_btf); - if (!IS_ERR(expect_btf)) - btf__free(expect_btf); + btf__free(test_btf); + btf__free(expect_btf); } void test_btf(void) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 5e129dc2073c..1b90e684ff13 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -32,8 +32,9 @@ static int btf_dump_all_types(const struct btf *btf, int err = 0, id; d = btf_dump__new(btf, NULL, opts, btf_dump_printf); - if (IS_ERR(d)) - return PTR_ERR(d); + err = libbpf_get_error(d); + if (err) + return err; for (id = 1; id <= type_cnt; id++) { err = btf_dump__dump_type(d, id); @@ -56,8 +57,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t) snprintf(test_file, sizeof(test_file), "%s.o", t->file); btf = btf__parse_elf(test_file, NULL); - if (CHECK(IS_ERR(btf), "btf_parse_elf", - "failed to load test BTF: %ld\n", PTR_ERR(btf))) { + if (!ASSERT_OK_PTR(btf, "btf_parse_elf")) { err = -PTR_ERR(btf); btf = NULL; goto done; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index f36da15b134f..022c7d89d6f4 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -4,8 +4,6 @@ #include <bpf/btf.h> #include "btf_helpers.h" -static int duration = 0; - void test_btf_write() { const struct btf_var_secinfo *vi; const struct btf_type *t; @@ -16,7 +14,7 @@ void test_btf_write() { int id, err, str_off; btf = btf__new_empty(); - if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf))) + if (!ASSERT_OK_PTR(btf, "new_empty")) return; str_off = btf__find_str(btf, "int"); diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c index 643dfa35419c..876be0ecb654 100644 --- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c @@ -102,8 +102,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd) */ parent_link = bpf_program__attach_cgroup(obj->progs.egress, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_link), "parent-cg-attach", - "err %ld", PTR_ERR(parent_link))) + if (!ASSERT_OK_PTR(parent_link, "parent-cg-attach")) goto close_bpf_object; err = connect_send(CHILD_CGROUP); if (CHECK(err, "first-connect-send", "errno %d", errno)) @@ -126,8 +125,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd) */ child_link = bpf_program__attach_cgroup(obj->progs.egress, child_cgroup_fd); - if (CHECK(IS_ERR(child_link), "child-cg-attach", - "err %ld", PTR_ERR(child_link))) + if (!ASSERT_OK_PTR(child_link, "child-cg-attach")) goto close_bpf_object; err = connect_send(CHILD_CGROUP); if (CHECK(err, "second-connect-send", "errno %d", errno)) @@ -147,10 +145,8 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd) goto close_bpf_object; close_bpf_object: - if (!IS_ERR(parent_link)) - bpf_link__destroy(parent_link); - if (!IS_ERR(child_link)) - bpf_link__destroy(child_link); + bpf_link__destroy(parent_link); + bpf_link__destroy(child_link); cg_storage_multi_egress_only__destroy(obj); } @@ -176,18 +172,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd) */ parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach", - "err %ld", PTR_ERR(parent_egress1_link))) + if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach")) goto close_bpf_object; parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach", - "err %ld", PTR_ERR(parent_egress2_link))) + if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach")) goto close_bpf_object; parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach", - "err %ld", PTR_ERR(parent_ingress_link))) + if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach")) goto close_bpf_object; err = connect_send(CHILD_CGROUP); if (CHECK(err, "first-connect-send", "errno %d", errno)) @@ -221,18 +214,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd) */ child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, child_cgroup_fd); - if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach", - "err %ld", PTR_ERR(child_egress1_link))) + if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach")) goto close_bpf_object; child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, child_cgroup_fd); - if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach", - "err %ld", PTR_ERR(child_egress2_link))) + if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach")) goto close_bpf_object; child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, child_cgroup_fd); - if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach", - "err %ld", PTR_ERR(child_ingress_link))) + if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach")) goto close_bpf_object; err = connect_send(CHILD_CGROUP); if (CHECK(err, "second-connect-send", "errno %d", errno)) @@ -264,18 +254,12 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd) goto close_bpf_object; close_bpf_object: - if (!IS_ERR(parent_egress1_link)) - bpf_link__destroy(parent_egress1_link); - if (!IS_ERR(parent_egress2_link)) - bpf_link__destroy(parent_egress2_link); - if (!IS_ERR(parent_ingress_link)) - bpf_link__destroy(parent_ingress_link); - if (!IS_ERR(child_egress1_link)) - bpf_link__destroy(child_egress1_link); - if (!IS_ERR(child_egress2_link)) - bpf_link__destroy(child_egress2_link); - if (!IS_ERR(child_ingress_link)) - bpf_link__destroy(child_ingress_link); + bpf_link__destroy(parent_egress1_link); + bpf_link__destroy(parent_egress2_link); + bpf_link__destroy(parent_ingress_link); + bpf_link__destroy(child_egress1_link); + bpf_link__destroy(child_egress2_link); + bpf_link__destroy(child_ingress_link); cg_storage_multi_isolated__destroy(obj); } @@ -301,18 +285,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd) */ parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach", - "err %ld", PTR_ERR(parent_egress1_link))) + if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach")) goto close_bpf_object; parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach", - "err %ld", PTR_ERR(parent_egress2_link))) + if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach")) goto close_bpf_object; parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, parent_cgroup_fd); - if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach", - "err %ld", PTR_ERR(parent_ingress_link))) + if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach")) goto close_bpf_object; err = connect_send(CHILD_CGROUP); if (CHECK(err, "first-connect-send", "errno %d", errno)) @@ -338,18 +319,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd) */ child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1, child_cgroup_fd); - if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach", - "err %ld", PTR_ERR(child_egress1_link))) + if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach")) goto close_bpf_object; child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2, child_cgroup_fd); - if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach", - "err %ld", PTR_ERR(child_egress2_link))) + if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach")) goto close_bpf_object; child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress, child_cgroup_fd); - if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach", - "err %ld", PTR_ERR(child_ingress_link))) + if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach")) goto close_bpf_object; err = connect_send(CHILD_CGROUP); if (CHECK(err, "second-connect-send", "errno %d", errno)) @@ -375,18 +353,12 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd) goto close_bpf_object; close_bpf_object: - if (!IS_ERR(parent_egress1_link)) - bpf_link__destroy(parent_egress1_link); - if (!IS_ERR(parent_egress2_link)) - bpf_link__destroy(parent_egress2_link); - if (!IS_ERR(parent_ingress_link)) - bpf_link__destroy(parent_ingress_link); - if (!IS_ERR(child_egress1_link)) - bpf_link__destroy(child_egress1_link); - if (!IS_ERR(child_egress2_link)) - bpf_link__destroy(child_egress2_link); - if (!IS_ERR(child_ingress_link)) - bpf_link__destroy(child_ingress_link); + bpf_link__destroy(parent_egress1_link); + bpf_link__destroy(parent_egress2_link); + bpf_link__destroy(parent_ingress_link); + bpf_link__destroy(child_egress1_link); + bpf_link__destroy(child_egress2_link); + bpf_link__destroy(child_ingress_link); cg_storage_multi_shared__destroy(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c index 0a1fc9816cef..20bb8831dda6 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -167,7 +167,7 @@ void test_cgroup_attach_multi(void) prog_cnt = 2; CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, &attach_flags, - prog_ids, &prog_cnt) != -1); + prog_ids, &prog_cnt) >= 0); CHECK_FAIL(errno != ENOSPC); CHECK_FAIL(prog_cnt != 4); /* check that prog_ids are returned even when buffer is too small */ diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c index 736796e56ed1..9091524131d6 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c @@ -65,8 +65,7 @@ void test_cgroup_link(void) for (i = 0; i < cg_nr; i++) { links[i] = bpf_program__attach_cgroup(skel->progs.egress, cgs[i].fd); - if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n", - i, PTR_ERR(links[i]))) + if (!ASSERT_OK_PTR(links[i], "cg_attach")) goto cleanup; } @@ -121,8 +120,7 @@ void test_cgroup_link(void) links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress, cgs[last_cg].fd); - if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n", - PTR_ERR(links[last_cg]))) + if (!ASSERT_OK_PTR(links[last_cg], "cg_attach")) goto cleanup; ping_and_check(cg_nr + 1, 0); @@ -147,7 +145,7 @@ void test_cgroup_link(void) /* attempt to mix in with multi-attach bpf_link */ tmp_link = bpf_program__attach_cgroup(skel->progs.egress, cgs[last_cg].fd); - if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) { + if (!ASSERT_ERR_PTR(tmp_link, "cg_attach_fail")) { bpf_link__destroy(tmp_link); goto cleanup; } @@ -165,8 +163,7 @@ void test_cgroup_link(void) /* attach back link-based one */ links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress, cgs[last_cg].fd); - if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n", - PTR_ERR(links[last_cg]))) + if (!ASSERT_OK_PTR(links[last_cg], "cg_attach")) goto cleanup; ping_and_check(cg_nr, 0); @@ -249,8 +246,7 @@ cleanup: BPF_CGROUP_INET_EGRESS); for (i = 0; i < cg_nr; i++) { - if (!IS_ERR(links[i])) - bpf_link__destroy(links[i]); + bpf_link__destroy(links[i]); } test_cgroup_link__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c index 464edc1c1708..b9dc4ec655b5 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c @@ -60,7 +60,7 @@ static void run_cgroup_bpf_test(const char *cg_path, int out_sk) goto cleanup; link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd); - if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "cgroup_attach")) goto cleanup; run_lookup_test(&skel->bss->g_serv_port, out_sk); diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index b62a39315336..012068f33a0a 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -53,7 +53,7 @@ static void test_check_mtu_xdp_attach(void) prog = skel->progs.xdp_use_helper_basic; link = bpf_program__attach_xdp(prog, IFINDEX_LO); - if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "link_attach")) goto out; skel->links.xdp_use_helper_basic = link; diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 607710826dca..d02e064c535f 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -369,8 +369,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test) const char *name; int i; - if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) || - CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) { + if (!ASSERT_OK_PTR(local_btf, "local_btf") || !ASSERT_OK_PTR(targ_btf, "targ_btf")) { btf__free(local_btf); btf__free(targ_btf); return -EINVAL; @@ -848,8 +847,7 @@ void test_core_reloc(void) } obj = bpf_object__open_file(test_case->bpf_obj_file, NULL); - if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n", - test_case->bpf_obj_file, PTR_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open")) continue; probe_name = "raw_tracepoint/sys_enter"; @@ -899,8 +897,7 @@ void test_core_reloc(void) data->my_pid_tgid = my_pid_tgid; link = bpf_program__attach_raw_tracepoint(prog, tp_name); - if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", - PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto cleanup; /* trigger test run */ @@ -941,10 +938,8 @@ cleanup: CHECK_FAIL(munmap(mmap_data, mmap_sz)); mmap_data = NULL; } - if (!IS_ERR_OR_NULL(link)) { - bpf_link__destroy(link); - link = NULL; - } + bpf_link__destroy(link); + link = NULL; bpf_object__close(obj); } } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 109d0345a2be..91154c2ba256 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -#include "fentry_test.skel.h" -#include "fexit_test.skel.h" +#include "fentry_test.lskel.h" +#include "fexit_test.lskel.h" void test_fentry_fexit(void) { @@ -26,7 +26,7 @@ void test_fentry_fexit(void) if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto close_prog; - prog_fd = bpf_program__fd(fexit_skel->progs.test1); + prog_fd = fexit_skel->progs.test1.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); CHECK(err || retval, "ipv6", diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index 7cb111b11995..174c89e7456e 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -1,13 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -#include "fentry_test.skel.h" +#include "fentry_test.lskel.h" static int fentry_test(struct fentry_test *fentry_skel) { int err, prog_fd, i; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; __u64 *result; err = fentry_test__attach(fentry_skel); @@ -15,11 +15,11 @@ static int fentry_test(struct fentry_test *fentry_skel) return err; /* Check that already linked program can't be attached again. */ - link = bpf_program__attach(fentry_skel->progs.test1); - if (!ASSERT_ERR_PTR(link, "fentry_attach_link")) + link_fd = fentry_test__test1__attach(fentry_skel); + if (!ASSERT_LT(link_fd, 0, "fentry_attach_link")) return -1; - prog_fd = bpf_program__fd(fentry_skel->progs.test1); + prog_fd = fentry_skel->progs.test1.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); ASSERT_OK(err, "test_run"); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 63990842d20f..73b4c76e6b86 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -146,10 +146,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, close_prog: for (i = 0; i < prog_cnt; i++) - if (!IS_ERR_OR_NULL(link[i])) - bpf_link__destroy(link[i]); - if (!IS_ERR_OR_NULL(obj)) - bpf_object__close(obj); + bpf_link__destroy(link[i]); + bpf_object__close(obj); bpf_object__close(tgt_obj); free(link); free(prog); @@ -231,7 +229,7 @@ static int test_second_attach(struct bpf_object *obj) return err; link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name); - if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd)) + if (!ASSERT_OK_PTR(link, "second_link")) goto out; err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6), @@ -283,9 +281,7 @@ static void test_fmod_ret_freplace(void) opts.attach_prog_fd = pkt_fd; freplace_obj = bpf_object__open_file(freplace_name, &opts); - if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open", - "failed to open %s: %ld\n", freplace_name, - PTR_ERR(freplace_obj))) + if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open")) goto out; err = bpf_object__load(freplace_obj); @@ -294,14 +290,12 @@ static void test_fmod_ret_freplace(void) prog = bpf_program__next(NULL, freplace_obj); freplace_link = bpf_program__attach_trace(prog); - if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n")) + if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace")) goto out; opts.attach_prog_fd = bpf_program__fd(prog); fmod_obj = bpf_object__open_file(fmod_ret_name, &opts); - if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open", - "failed to open %s: %ld\n", fmod_ret_name, - PTR_ERR(fmod_obj))) + if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open")) goto out; err = bpf_object__load(fmod_obj); @@ -350,9 +344,7 @@ static void test_obj_load_failure_common(const char *obj_file, ); obj = bpf_object__open_file(obj_file, &opts); - if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", - "failed to open %s: %ld\n", obj_file, - PTR_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open")) goto close_prog; /* It should fail to load the program */ @@ -361,8 +353,7 @@ static void test_obj_load_failure_common(const char *obj_file, goto close_prog; close_prog: - if (!IS_ERR_OR_NULL(obj)) - bpf_object__close(obj); + bpf_object__close(obj); bpf_object__close(pkt_obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c index ccc7e8a34ab6..4e7f4b42ea29 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c @@ -6,7 +6,7 @@ #include <time.h> #include <sys/mman.h> #include <sys/syscall.h> -#include "fexit_sleep.skel.h" +#include "fexit_sleep.lskel.h" static int do_sleep(void *skel) { @@ -58,8 +58,8 @@ void test_fexit_sleep(void) * waiting for percpu_ref_kill to confirm). The other one * will be freed quickly. */ - close(bpf_program__fd(fexit_skel->progs.nanosleep_fentry)); - close(bpf_program__fd(fexit_skel->progs.nanosleep_fexit)); + close(fexit_skel->progs.nanosleep_fentry.prog_fd); + close(fexit_skel->progs.nanosleep_fexit.prog_fd); fexit_sleep__detach(fexit_skel); /* kill the thread to unwind sys_nanosleep stack through the trampoline */ diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index 6792e41f7f69..af3dba726701 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -1,13 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> -#include "fexit_test.skel.h" +#include "fexit_test.lskel.h" static int fexit_test(struct fexit_test *fexit_skel) { int err, prog_fd, i; __u32 duration = 0, retval; - struct bpf_link *link; + int link_fd; __u64 *result; err = fexit_test__attach(fexit_skel); @@ -15,11 +15,11 @@ static int fexit_test(struct fexit_test *fexit_skel) return err; /* Check that already linked program can't be attached again. */ - link = bpf_program__attach(fexit_skel->progs.test1); - if (!ASSERT_ERR_PTR(link, "fexit_attach_link")) + link_fd = fexit_test__test1__attach(fexit_skel); + if (!ASSERT_LT(link_fd, 0, "fexit_attach_link")) return -1; - prog_fd = bpf_program__fd(fexit_skel->progs.test1); + prog_fd = fexit_skel->progs.test1.prog_fd; err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, NULL, &retval, &duration); ASSERT_OK(err, "test_run"); diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index cd6dc80edf18..225714f71ac6 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -541,7 +541,7 @@ static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd) return; link = bpf_program__attach_netns(skel->progs._dissect, net_fd); - if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_netns")) goto out_close; run_tests_skb_less(tap_fd, skel->maps.last_dissection); diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c index 172c586b6996..3931ede5c534 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -134,9 +134,9 @@ static void test_link_create_link_create(int netns, int prog1, int prog2) /* Expect failure creating link when another link exists */ errno = 0; link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts); - if (CHECK_FAIL(link2 != -1 || errno != E2BIG)) + if (CHECK_FAIL(link2 >= 0 || errno != E2BIG)) perror("bpf_prog_attach(prog2) expected E2BIG"); - if (link2 != -1) + if (link2 >= 0) close(link2); CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); @@ -159,9 +159,9 @@ static void test_prog_attach_link_create(int netns, int prog1, int prog2) /* Expect failure creating link when prog attached */ errno = 0; link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts); - if (CHECK_FAIL(link != -1 || errno != EEXIST)) + if (CHECK_FAIL(link >= 0 || errno != EEXIST)) perror("bpf_link_create(prog2) expected EEXIST"); - if (link != -1) + if (link >= 0) close(link); CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1)); @@ -623,7 +623,7 @@ static void run_tests(int netns) } out_close: for (i = 0; i < ARRAY_SIZE(progs); i++) { - if (progs[i] != -1) + if (progs[i] >= 0) CHECK_FAIL(close(progs[i])); } } diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c index 925722217edf..522237aa4470 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c @@ -121,12 +121,12 @@ void test_get_stack_raw_tp(void) goto close_prog; link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); - if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto close_prog; pb_opts.sample_cb = get_stack_print_output; pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts); - if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto close_prog; /* trigger some syscall action */ @@ -141,9 +141,7 @@ void test_get_stack_raw_tp(void) } close_prog: - if (!IS_ERR_OR_NULL(link)) - bpf_link__destroy(link); - if (!IS_ERR_OR_NULL(pb)) - perf_buffer__free(pb); + bpf_link__destroy(link); + perf_buffer__free(pb); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c index d884b2ed5bc5..8d5a6023a1bb 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c @@ -48,8 +48,7 @@ void test_get_stackid_cannot_attach(void) skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, pmu_fd); - CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain", - "should have failed\n"); + ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_no_callchain"); close(pmu_fd); /* add PERF_SAMPLE_CALLCHAIN, attach should succeed */ @@ -65,8 +64,7 @@ void test_get_stackid_cannot_attach(void) skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, pmu_fd); - CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain", - "err: %ld\n", PTR_ERR(skel->links.oncpu)); + ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event_callchain"); close(pmu_fd); /* add exclude_callchain_kernel, attach should fail */ @@ -82,8 +80,7 @@ void test_get_stackid_cannot_attach(void) skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, pmu_fd); - CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel", - "should have failed\n"); + ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_exclude_callchain_kernel"); close(pmu_fd); cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c index 428d488830c6..4747ab18f97f 100644 --- a/tools/testing/selftests/bpf/prog_tests/hashmap.c +++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c @@ -48,8 +48,7 @@ static void test_hashmap_generic(void) struct hashmap *map; map = hashmap__new(hash_fn, equal_fn, NULL); - if (CHECK(IS_ERR(map), "hashmap__new", - "failed to create map: %ld\n", PTR_ERR(map))) + if (!ASSERT_OK_PTR(map, "hashmap__new")) return; for (i = 0; i < ELEM_CNT; i++) { @@ -267,8 +266,7 @@ static void test_hashmap_multimap(void) /* force collisions */ map = hashmap__new(collision_hash_fn, equal_fn, NULL); - if (CHECK(IS_ERR(map), "hashmap__new", - "failed to create map: %ld\n", PTR_ERR(map))) + if (!ASSERT_OK_PTR(map, "hashmap__new")) return; /* set up multimap: @@ -339,8 +337,7 @@ static void test_hashmap_empty() /* force collisions */ map = hashmap__new(hash_fn, equal_fn, NULL); - if (CHECK(IS_ERR(map), "hashmap__new", - "failed to create map: %ld\n", PTR_ERR(map))) + if (!ASSERT_OK_PTR(map, "hashmap__new")) goto cleanup; if (CHECK(hashmap__size(map) != 0, "hashmap__size", diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c index d65107919998..ddfb6bf97152 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -97,15 +97,13 @@ void test_kfree_skb(void) goto close_prog; link = bpf_program__attach_raw_tracepoint(prog, NULL); - if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto close_prog; link_fentry = bpf_program__attach_trace(fentry); - if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n", - PTR_ERR(link_fentry))) + if (!ASSERT_OK_PTR(link_fentry, "attach fentry")) goto close_prog; link_fexit = bpf_program__attach_trace(fexit); - if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n", - PTR_ERR(link_fexit))) + if (!ASSERT_OK_PTR(link_fexit, "attach fexit")) goto close_prog; perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map"); @@ -116,7 +114,7 @@ void test_kfree_skb(void) pb_opts.sample_cb = on_sample; pb_opts.ctx = &passed; pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts); - if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto close_prog; memcpy(skb.cb, &cb, sizeof(cb)); @@ -144,12 +142,9 @@ void test_kfree_skb(void) CHECK_FAIL(!test_ok[0] || !test_ok[1]); close_prog: perf_buffer__free(pb); - if (!IS_ERR_OR_NULL(link)) - bpf_link__destroy(link); - if (!IS_ERR_OR_NULL(link_fentry)) - bpf_link__destroy(link_fentry); - if (!IS_ERR_OR_NULL(link_fexit)) - bpf_link__destroy(link_fexit); + bpf_link__destroy(link); + bpf_link__destroy(link_fentry); + bpf_link__destroy(link_fexit); bpf_object__close(obj); bpf_object__close(obj2); } diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index 7fc0951ee75f..30a7b9b837bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -2,7 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <test_progs.h> #include <network_helpers.h> -#include "kfunc_call_test.skel.h" +#include "kfunc_call_test.lskel.h" #include "kfunc_call_test_subprog.skel.h" static void test_main(void) @@ -14,13 +14,13 @@ static void test_main(void) if (!ASSERT_OK_PTR(skel, "skel")) return; - prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1); + prog_fd = skel->progs.kfunc_call_test1.prog_fd; err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL); ASSERT_OK(err, "bpf_prog_test_run(test1)"); ASSERT_EQ(retval, 12, "test1-retval"); - prog_fd = bpf_program__fd(skel->progs.kfunc_call_test2); + prog_fd = skel->progs.kfunc_call_test2.prog_fd; err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL); ASSERT_OK(err, "bpf_prog_test_run(test2)"); diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c index b58b775d19f3..67bebd324147 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -87,8 +87,7 @@ void test_ksyms_btf(void) struct btf *btf; btf = libbpf_find_kernel_btf(); - if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n", - PTR_ERR(btf))) + if (!ASSERT_OK_PTR(btf, "btf_exists")) return; percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu", diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c index 4c232b456479..2cd5cded543f 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c @@ -4,7 +4,7 @@ #include <test_progs.h> #include <bpf/libbpf.h> #include <bpf/btf.h> -#include "test_ksyms_module.skel.h" +#include "test_ksyms_module.lskel.h" static int duration; diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c index a743288cf384..6fc97c45f71e 100644 --- a/tools/testing/selftests/bpf/prog_tests/link_pinning.c +++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c @@ -17,7 +17,7 @@ void test_link_pinning_subtest(struct bpf_program *prog, int err, i; link = bpf_program__attach(prog); - if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "link_attach")) goto cleanup; bss->in = 1; @@ -51,7 +51,7 @@ void test_link_pinning_subtest(struct bpf_program *prog, /* re-open link from BPFFS */ link = bpf_link__open(link_pin_path); - if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "link_open")) goto cleanup; CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2", @@ -84,8 +84,7 @@ void test_link_pinning_subtest(struct bpf_program *prog, CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i); cleanup: - if (!IS_ERR(link)) - bpf_link__destroy(link); + bpf_link__destroy(link); } void test_link_pinning(void) diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c new file mode 100644 index 000000000000..beebfa9730e1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <test_progs.h> +#include "test_lookup_and_delete.skel.h" + +#define START_VALUE 1234 +#define NEW_VALUE 4321 +#define MAX_ENTRIES 2 + +static int duration; +static int nr_cpus; + +static int fill_values(int map_fd) +{ + __u64 key, value = START_VALUE; + int err; + + for (key = 1; key < MAX_ENTRIES + 1; key++) { + err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + return -1; + } + + return 0; +} + +static int fill_values_percpu(int map_fd) +{ + __u64 key, value[nr_cpus]; + int i, err; + + for (i = 0; i < nr_cpus; i++) + value[i] = START_VALUE; + + for (key = 1; key < MAX_ENTRIES + 1; key++) { + err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + return -1; + } + + return 0; +} + +static struct test_lookup_and_delete *setup_prog(enum bpf_map_type map_type, + int *map_fd) +{ + struct test_lookup_and_delete *skel; + int err; + + skel = test_lookup_and_delete__open(); + if (!ASSERT_OK_PTR(skel, "test_lookup_and_delete__open")) + return NULL; + + err = bpf_map__set_type(skel->maps.hash_map, map_type); + if (!ASSERT_OK(err, "bpf_map__set_type")) + goto cleanup; + + err = bpf_map__set_max_entries(skel->maps.hash_map, MAX_ENTRIES); + if (!ASSERT_OK(err, "bpf_map__set_max_entries")) + goto cleanup; + + err = test_lookup_and_delete__load(skel); + if (!ASSERT_OK(err, "test_lookup_and_delete__load")) + goto cleanup; + + *map_fd = bpf_map__fd(skel->maps.hash_map); + if (!ASSERT_GE(*map_fd, 0, "bpf_map__fd")) + goto cleanup; + + return skel; + +cleanup: + test_lookup_and_delete__destroy(skel); + return NULL; +} + +/* Triggers BPF program that updates map with given key and value */ +static int trigger_tp(struct test_lookup_and_delete *skel, __u64 key, + __u64 value) +{ + int err; + + skel->bss->set_pid = getpid(); + skel->bss->set_key = key; + skel->bss->set_value = value; + + err = test_lookup_and_delete__attach(skel); + if (!ASSERT_OK(err, "test_lookup_and_delete__attach")) + return -1; + + syscall(__NR_getpgid); + + test_lookup_and_delete__detach(skel); + + return 0; +} + +static void test_lookup_and_delete_hash(void) +{ + struct test_lookup_and_delete *skel; + __u64 key, value; + int map_fd, err; + + /* Setup program and fill the map. */ + skel = setup_prog(BPF_MAP_TYPE_HASH, &map_fd); + if (!ASSERT_OK_PTR(skel, "setup_prog")) + return; + + err = fill_values(map_fd); + if (!ASSERT_OK(err, "fill_values")) + goto cleanup; + + /* Lookup and delete element. */ + key = 1; + err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value); + if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) + goto cleanup; + + /* Fetched value should match the initially set value. */ + if (CHECK(value != START_VALUE, "bpf_map_lookup_and_delete_elem", + "unexpected value=%lld\n", value)) + goto cleanup; + + /* Check that the entry is non existent. */ + err = bpf_map_lookup_elem(map_fd, &key, &value); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem")) + goto cleanup; + +cleanup: + test_lookup_and_delete__destroy(skel); +} + +static void test_lookup_and_delete_percpu_hash(void) +{ + struct test_lookup_and_delete *skel; + __u64 key, val, value[nr_cpus]; + int map_fd, err, i; + + /* Setup program and fill the map. */ + skel = setup_prog(BPF_MAP_TYPE_PERCPU_HASH, &map_fd); + if (!ASSERT_OK_PTR(skel, "setup_prog")) + return; + + err = fill_values_percpu(map_fd); + if (!ASSERT_OK(err, "fill_values_percpu")) + goto cleanup; + + /* Lookup and delete element. */ + key = 1; + err = bpf_map_lookup_and_delete_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) + goto cleanup; + + for (i = 0; i < nr_cpus; i++) { + val = value[i]; + + /* Fetched value should match the initially set value. */ + if (CHECK(val != START_VALUE, "map value", + "unexpected for cpu %d: %lld\n", i, val)) + goto cleanup; + } + + /* Check that the entry is non existent. */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem")) + goto cleanup; + +cleanup: + test_lookup_and_delete__destroy(skel); +} + +static void test_lookup_and_delete_lru_hash(void) +{ + struct test_lookup_and_delete *skel; + __u64 key, value; + int map_fd, err; + + /* Setup program and fill the LRU map. */ + skel = setup_prog(BPF_MAP_TYPE_LRU_HASH, &map_fd); + if (!ASSERT_OK_PTR(skel, "setup_prog")) + return; + + err = fill_values(map_fd); + if (!ASSERT_OK(err, "fill_values")) + goto cleanup; + + /* Insert new element at key=3, should reuse LRU element. */ + key = 3; + err = trigger_tp(skel, key, NEW_VALUE); + if (!ASSERT_OK(err, "trigger_tp")) + goto cleanup; + + /* Lookup and delete element 3. */ + err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value); + if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) + goto cleanup; + + /* Value should match the new value. */ + if (CHECK(value != NEW_VALUE, "bpf_map_lookup_and_delete_elem", + "unexpected value=%lld\n", value)) + goto cleanup; + + /* Check that entries 3 and 1 are non existent. */ + err = bpf_map_lookup_elem(map_fd, &key, &value); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem")) + goto cleanup; + + key = 1; + err = bpf_map_lookup_elem(map_fd, &key, &value); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem")) + goto cleanup; + +cleanup: + test_lookup_and_delete__destroy(skel); +} + +static void test_lookup_and_delete_lru_percpu_hash(void) +{ + struct test_lookup_and_delete *skel; + __u64 key, val, value[nr_cpus]; + int map_fd, err, i, cpucnt = 0; + + /* Setup program and fill the LRU map. */ + skel = setup_prog(BPF_MAP_TYPE_LRU_PERCPU_HASH, &map_fd); + if (!ASSERT_OK_PTR(skel, "setup_prog")) + return; + + err = fill_values_percpu(map_fd); + if (!ASSERT_OK(err, "fill_values_percpu")) + goto cleanup; + + /* Insert new element at key=3, should reuse LRU element 1. */ + key = 3; + err = trigger_tp(skel, key, NEW_VALUE); + if (!ASSERT_OK(err, "trigger_tp")) + goto cleanup; + + /* Clean value. */ + for (i = 0; i < nr_cpus; i++) + value[i] = 0; + + /* Lookup and delete element 3. */ + err = bpf_map_lookup_and_delete_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) { + goto cleanup; + } + + /* Check if only one CPU has set the value. */ + for (i = 0; i < nr_cpus; i++) { + val = value[i]; + if (val) { + if (CHECK(val != NEW_VALUE, "map value", + "unexpected for cpu %d: %lld\n", i, val)) + goto cleanup; + cpucnt++; + } + } + if (CHECK(cpucnt != 1, "map value", "set for %d CPUs instead of 1!\n", + cpucnt)) + goto cleanup; + + /* Check that entries 3 and 1 are non existent. */ + err = bpf_map_lookup_elem(map_fd, &key, &value); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem")) + goto cleanup; + + key = 1; + err = bpf_map_lookup_elem(map_fd, &key, &value); + if (!ASSERT_ERR(err, "bpf_map_lookup_elem")) + goto cleanup; + +cleanup: + test_lookup_and_delete__destroy(skel); +} + +void test_lookup_and_delete(void) +{ + nr_cpus = bpf_num_possible_cpus(); + + if (test__start_subtest("lookup_and_delete")) + test_lookup_and_delete_hash(); + if (test__start_subtest("lookup_and_delete_percpu")) + test_lookup_and_delete_percpu_hash(); + if (test__start_subtest("lookup_and_delete_lru")) + test_lookup_and_delete_lru_hash(); + if (test__start_subtest("lookup_and_delete_lru_percpu")) + test_lookup_and_delete_lru_percpu_hash(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c new file mode 100644 index 000000000000..59adb4715394 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c @@ -0,0 +1,559 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check if we can migrate child sockets. + * + * 1. call listen() for 4 server sockets. + * 2. call connect() for 25 client sockets. + * 3. call listen() for 1 server socket. (migration target) + * 4. update a map to migrate all child sockets + * to the last server socket (migrate_map[cookie] = 4) + * 5. call shutdown() for first 4 server sockets + * and migrate the requests in the accept queue + * to the last server socket. + * 6. call listen() for the second server socket. + * 7. call shutdown() for the last server + * and migrate the requests in the accept queue + * to the second server socket. + * 8. call listen() for the last server. + * 9. call shutdown() for the second server + * and migrate the requests in the accept queue + * to the last server socket. + * 10. call accept() for the last server socket. + * + * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> + */ + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "test_progs.h" +#include "test_migrate_reuseport.skel.h" +#include "network_helpers.h" + +#ifndef TCP_FASTOPEN_CONNECT +#define TCP_FASTOPEN_CONNECT 30 +#endif + +#define IFINDEX_LO 1 + +#define NR_SERVERS 5 +#define NR_CLIENTS (NR_SERVERS * 5) +#define MIGRATED_TO (NR_SERVERS - 1) + +/* fastopenq->max_qlen and sk->sk_max_ack_backlog */ +#define QLEN (NR_CLIENTS * 5) + +#define MSG "Hello World\0" +#define MSGLEN 12 + +static struct migrate_reuseport_test_case { + const char *name; + __s64 servers[NR_SERVERS]; + __s64 clients[NR_CLIENTS]; + struct sockaddr_storage addr; + socklen_t addrlen; + int family; + int state; + bool drop_ack; + bool expire_synack_timer; + bool fastopen; + struct bpf_link *link; +} test_cases[] = { + { + .name = "IPv4 TCP_ESTABLISHED inet_csk_listen_stop", + .family = AF_INET, + .state = BPF_TCP_ESTABLISHED, + .drop_ack = false, + .expire_synack_timer = false, + .fastopen = false, + }, + { + .name = "IPv4 TCP_SYN_RECV inet_csk_listen_stop", + .family = AF_INET, + .state = BPF_TCP_SYN_RECV, + .drop_ack = true, + .expire_synack_timer = false, + .fastopen = true, + }, + { + .name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler", + .family = AF_INET, + .state = BPF_TCP_NEW_SYN_RECV, + .drop_ack = true, + .expire_synack_timer = true, + .fastopen = false, + }, + { + .name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance", + .family = AF_INET, + .state = BPF_TCP_NEW_SYN_RECV, + .drop_ack = true, + .expire_synack_timer = false, + .fastopen = false, + }, + { + .name = "IPv6 TCP_ESTABLISHED inet_csk_listen_stop", + .family = AF_INET6, + .state = BPF_TCP_ESTABLISHED, + .drop_ack = false, + .expire_synack_timer = false, + .fastopen = false, + }, + { + .name = "IPv6 TCP_SYN_RECV inet_csk_listen_stop", + .family = AF_INET6, + .state = BPF_TCP_SYN_RECV, + .drop_ack = true, + .expire_synack_timer = false, + .fastopen = true, + }, + { + .name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler", + .family = AF_INET6, + .state = BPF_TCP_NEW_SYN_RECV, + .drop_ack = true, + .expire_synack_timer = true, + .fastopen = false, + }, + { + .name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance", + .family = AF_INET6, + .state = BPF_TCP_NEW_SYN_RECV, + .drop_ack = true, + .expire_synack_timer = false, + .fastopen = false, + } +}; + +static void init_fds(__s64 fds[], int len) +{ + int i; + + for (i = 0; i < len; i++) + fds[i] = -1; +} + +static void close_fds(__s64 fds[], int len) +{ + int i; + + for (i = 0; i < len; i++) { + if (fds[i] != -1) { + close(fds[i]); + fds[i] = -1; + } + } +} + +static int setup_fastopen(char *buf, int size, int *saved_len, bool restore) +{ + int err = 0, fd, len; + + fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR); + if (!ASSERT_NEQ(fd, -1, "open")) + return -1; + + if (restore) { + len = write(fd, buf, *saved_len); + if (!ASSERT_EQ(len, *saved_len, "write - restore")) + err = -1; + } else { + *saved_len = read(fd, buf, size); + if (!ASSERT_GE(*saved_len, 1, "read")) { + err = -1; + goto close; + } + + err = lseek(fd, 0, SEEK_SET); + if (!ASSERT_OK(err, "lseek")) + goto close; + + /* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE | + * TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD) + */ + len = write(fd, "519", 3); + if (!ASSERT_EQ(len, 3, "write - setup")) + err = -1; + } + +close: + close(fd); + + return err; +} + +static int drop_ack(struct migrate_reuseport_test_case *test_case, + struct test_migrate_reuseport *skel) +{ + if (test_case->family == AF_INET) + skel->bss->server_port = ((struct sockaddr_in *) + &test_case->addr)->sin_port; + else + skel->bss->server_port = ((struct sockaddr_in6 *) + &test_case->addr)->sin6_port; + + test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack, + IFINDEX_LO); + if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp")) + return -1; + + return 0; +} + +static int pass_ack(struct migrate_reuseport_test_case *test_case) +{ + int err; + + err = bpf_link__detach(test_case->link); + if (!ASSERT_OK(err, "bpf_link__detach")) + return -1; + + test_case->link = NULL; + + return 0; +} + +static int start_servers(struct migrate_reuseport_test_case *test_case, + struct test_migrate_reuseport *skel) +{ + int i, err, prog_fd, reuseport = 1, qlen = QLEN; + + prog_fd = bpf_program__fd(skel->progs.migrate_reuseport); + + make_sockaddr(test_case->family, + test_case->family == AF_INET ? "127.0.0.1" : "::1", 0, + &test_case->addr, &test_case->addrlen); + + for (i = 0; i < NR_SERVERS; i++) { + test_case->servers[i] = socket(test_case->family, SOCK_STREAM, + IPPROTO_TCP); + if (!ASSERT_NEQ(test_case->servers[i], -1, "socket")) + return -1; + + err = setsockopt(test_case->servers[i], SOL_SOCKET, + SO_REUSEPORT, &reuseport, sizeof(reuseport)); + if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT")) + return -1; + + err = bind(test_case->servers[i], + (struct sockaddr *)&test_case->addr, + test_case->addrlen); + if (!ASSERT_OK(err, "bind")) + return -1; + + if (i == 0) { + err = setsockopt(test_case->servers[i], SOL_SOCKET, + SO_ATTACH_REUSEPORT_EBPF, + &prog_fd, sizeof(prog_fd)); + if (!ASSERT_OK(err, + "setsockopt - SO_ATTACH_REUSEPORT_EBPF")) + return -1; + + err = getsockname(test_case->servers[i], + (struct sockaddr *)&test_case->addr, + &test_case->addrlen); + if (!ASSERT_OK(err, "getsockname")) + return -1; + } + + if (test_case->fastopen) { + err = setsockopt(test_case->servers[i], + SOL_TCP, TCP_FASTOPEN, + &qlen, sizeof(qlen)); + if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN")) + return -1; + } + + /* All requests will be tied to the first four listeners */ + if (i != MIGRATED_TO) { + err = listen(test_case->servers[i], qlen); + if (!ASSERT_OK(err, "listen")) + return -1; + } + } + + return 0; +} + +static int start_clients(struct migrate_reuseport_test_case *test_case) +{ + char buf[MSGLEN] = MSG; + int i, err; + + for (i = 0; i < NR_CLIENTS; i++) { + test_case->clients[i] = socket(test_case->family, SOCK_STREAM, + IPPROTO_TCP); + if (!ASSERT_NEQ(test_case->clients[i], -1, "socket")) + return -1; + + /* The attached XDP program drops only the final ACK, so + * clients will transition to TCP_ESTABLISHED immediately. + */ + err = settimeo(test_case->clients[i], 100); + if (!ASSERT_OK(err, "settimeo")) + return -1; + + if (test_case->fastopen) { + int fastopen = 1; + + err = setsockopt(test_case->clients[i], IPPROTO_TCP, + TCP_FASTOPEN_CONNECT, &fastopen, + sizeof(fastopen)); + if (!ASSERT_OK(err, + "setsockopt - TCP_FASTOPEN_CONNECT")) + return -1; + } + + err = connect(test_case->clients[i], + (struct sockaddr *)&test_case->addr, + test_case->addrlen); + if (!ASSERT_OK(err, "connect")) + return -1; + + err = write(test_case->clients[i], buf, MSGLEN); + if (!ASSERT_EQ(err, MSGLEN, "write")) + return -1; + } + + return 0; +} + +static int update_maps(struct migrate_reuseport_test_case *test_case, + struct test_migrate_reuseport *skel) +{ + int i, err, migrated_to = MIGRATED_TO; + int reuseport_map_fd, migrate_map_fd; + __u64 value; + + reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map); + migrate_map_fd = bpf_map__fd(skel->maps.migrate_map); + + for (i = 0; i < NR_SERVERS; i++) { + value = (__u64)test_case->servers[i]; + err = bpf_map_update_elem(reuseport_map_fd, &i, &value, + BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map")) + return -1; + + err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map")) + return -1; + + err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to, + BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map")) + return -1; + } + + return 0; +} + +static int migrate_dance(struct migrate_reuseport_test_case *test_case) +{ + int i, err; + + /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests + * to the last listener based on eBPF. + */ + for (i = 0; i < MIGRATED_TO; i++) { + err = shutdown(test_case->servers[i], SHUT_RDWR); + if (!ASSERT_OK(err, "shutdown")) + return -1; + } + + /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */ + if (test_case->state == BPF_TCP_NEW_SYN_RECV) + return 0; + + /* Note that we use the second listener instead of the + * first one here. + * + * The fist listener is bind()ed with port 0 and, + * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so + * calling listen() again will bind() the first listener + * on a new ephemeral port and detach it from the existing + * reuseport group. (See: __inet_bind(), tcp_set_state()) + * + * OTOH, the second one is bind()ed with a specific port, + * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will + * resurrect the listener on the existing reuseport group. + */ + err = listen(test_case->servers[1], QLEN); + if (!ASSERT_OK(err, "listen")) + return -1; + + /* Migrate from the last listener to the second one. + * + * All listeners were detached out of the reuseport_map, + * so migration will be done by kernel random pick from here. + */ + err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR); + if (!ASSERT_OK(err, "shutdown")) + return -1; + + /* Back to the existing reuseport group */ + err = listen(test_case->servers[MIGRATED_TO], QLEN); + if (!ASSERT_OK(err, "listen")) + return -1; + + /* Migrate back to the last one from the second one */ + err = shutdown(test_case->servers[1], SHUT_RDWR); + if (!ASSERT_OK(err, "shutdown")) + return -1; + + return 0; +} + +static void count_requests(struct migrate_reuseport_test_case *test_case, + struct test_migrate_reuseport *skel) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int err, cnt = 0, client; + char buf[MSGLEN]; + + err = settimeo(test_case->servers[MIGRATED_TO], 4000); + if (!ASSERT_OK(err, "settimeo")) + goto out; + + for (; cnt < NR_CLIENTS; cnt++) { + client = accept(test_case->servers[MIGRATED_TO], + (struct sockaddr *)&addr, &len); + if (!ASSERT_NEQ(client, -1, "accept")) + goto out; + + memset(buf, 0, MSGLEN); + read(client, &buf, MSGLEN); + close(client); + + if (!ASSERT_STREQ(buf, MSG, "read")) + goto out; + } + +out: + ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace"); + + switch (test_case->state) { + case BPF_TCP_ESTABLISHED: + cnt = skel->bss->migrated_at_close; + break; + case BPF_TCP_SYN_RECV: + cnt = skel->bss->migrated_at_close_fastopen; + break; + case BPF_TCP_NEW_SYN_RECV: + if (test_case->expire_synack_timer) + cnt = skel->bss->migrated_at_send_synack; + else + cnt = skel->bss->migrated_at_recv_ack; + break; + default: + cnt = 0; + } + + ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog"); +} + +static void run_test(struct migrate_reuseport_test_case *test_case, + struct test_migrate_reuseport *skel) +{ + int err, saved_len; + char buf[16]; + + skel->bss->migrated_at_close = 0; + skel->bss->migrated_at_close_fastopen = 0; + skel->bss->migrated_at_send_synack = 0; + skel->bss->migrated_at_recv_ack = 0; + + init_fds(test_case->servers, NR_SERVERS); + init_fds(test_case->clients, NR_CLIENTS); + + if (test_case->fastopen) { + memset(buf, 0, sizeof(buf)); + + err = setup_fastopen(buf, sizeof(buf), &saved_len, false); + if (!ASSERT_OK(err, "setup_fastopen - setup")) + return; + } + + err = start_servers(test_case, skel); + if (!ASSERT_OK(err, "start_servers")) + goto close_servers; + + if (test_case->drop_ack) { + /* Drop the final ACK of the 3-way handshake and stick the + * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV. + */ + err = drop_ack(test_case, skel); + if (!ASSERT_OK(err, "drop_ack")) + goto close_servers; + } + + /* Tie requests to the first four listners */ + err = start_clients(test_case); + if (!ASSERT_OK(err, "start_clients")) + goto close_clients; + + err = listen(test_case->servers[MIGRATED_TO], QLEN); + if (!ASSERT_OK(err, "listen")) + goto close_clients; + + err = update_maps(test_case, skel); + if (!ASSERT_OK(err, "fill_maps")) + goto close_clients; + + /* Migrate the requests in the accept queue only. + * TCP_NEW_SYN_RECV requests are not migrated at this point. + */ + err = migrate_dance(test_case); + if (!ASSERT_OK(err, "migrate_dance")) + goto close_clients; + + if (test_case->expire_synack_timer) { + /* Wait for SYN+ACK timers to expire so that + * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests. + */ + sleep(1); + } + + if (test_case->link) { + /* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */ + err = pass_ack(test_case); + if (!ASSERT_OK(err, "pass_ack")) + goto close_clients; + } + + count_requests(test_case, skel); + +close_clients: + close_fds(test_case->clients, NR_CLIENTS); + + if (test_case->link) { + err = pass_ack(test_case); + ASSERT_OK(err, "pass_ack - clean up"); + } + +close_servers: + close_fds(test_case->servers, NR_SERVERS); + + if (test_case->fastopen) { + err = setup_fastopen(buf, sizeof(buf), &saved_len, true); + ASSERT_OK(err, "setup_fastopen - restore"); + } +} + +void test_migrate_reuseport(void) +{ + struct test_migrate_reuseport *skel; + int i; + + skel = test_migrate_reuseport__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + test__start_subtest(test_cases[i].name); + run_test(&test_cases[i], skel); + } + + test_migrate_reuseport__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/obj_name.c b/tools/testing/selftests/bpf/prog_tests/obj_name.c index e178416bddad..6194b776a28b 100644 --- a/tools/testing/selftests/bpf/prog_tests/obj_name.c +++ b/tools/testing/selftests/bpf/prog_tests/obj_name.c @@ -38,13 +38,13 @@ void test_obj_name(void) fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); CHECK((tests[i].success && fd < 0) || - (!tests[i].success && fd != -1) || + (!tests[i].success && fd >= 0) || (!tests[i].success && errno != tests[i].expected_errno), "check-bpf-prog-name", "fd %d(%d) errno %d(%d)\n", fd, tests[i].success, errno, tests[i].expected_errno); - if (fd != -1) + if (fd >= 0) close(fd); /* test different attr.map_name during BPF_MAP_CREATE */ @@ -59,13 +59,13 @@ void test_obj_name(void) memcpy(attr.map_name, tests[i].name, ncopy); fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); CHECK((tests[i].success && fd < 0) || - (!tests[i].success && fd != -1) || + (!tests[i].success && fd >= 0) || (!tests[i].success && errno != tests[i].expected_errno), "check-bpf-map-name", "fd %d(%d) errno %d(%d)\n", fd, tests[i].success, errno, tests[i].expected_errno); - if (fd != -1) + if (fd >= 0) close(fd); } } diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c index e35c444902a7..12c4f45cee1a 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c @@ -74,7 +74,7 @@ static void test_perf_branches_common(int perf_fd, /* attach perf_event */ link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd); - if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_perf_event")) goto out_destroy_skel; /* generate some branches on cpu 0 */ @@ -119,7 +119,7 @@ static void test_perf_branches_hw(void) * Some setups don't support branch records (virtual machines, !x86), * so skip test in this case. */ - if (pfd == -1) { + if (pfd < 0) { if (errno == ENOENT || errno == EOPNOTSUPP) { printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n", __func__); diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index ca9f0895ec84..6490e9673002 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -80,7 +80,7 @@ void test_perf_buffer(void) pb_opts.sample_cb = on_sample; pb_opts.ctx = &cpu_seen; pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts); - if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto out_close; CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd", diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c index 72c3690844fb..33144c9432ae 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c @@ -97,8 +97,7 @@ void test_perf_event_stackmap(void) skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, pmu_fd); - if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event", - "err %ld\n", PTR_ERR(skel->links.oncpu))) { + if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) { close(pmu_fd); goto cleanup; } diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c index 7aecfd9e87d1..95bd12097358 100644 --- a/tools/testing/selftests/bpf/prog_tests/probe_user.c +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -15,7 +15,7 @@ void test_probe_user(void) static const int zero = 0; obj = bpf_object__open_file(obj_file, &opts); - if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; kprobe_prog = bpf_object__find_program_by_title(obj, prog_name); @@ -33,11 +33,8 @@ void test_probe_user(void) goto cleanup; kprobe_link = bpf_program__attach(kprobe_prog); - if (CHECK(IS_ERR(kprobe_link), "attach_kprobe", - "err %ld\n", PTR_ERR(kprobe_link))) { - kprobe_link = NULL; + if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe")) goto cleanup; - } memset(&curr, 0, sizeof(curr)); in->sin_family = AF_INET; diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c index 131d7f7eeb42..89fc98faf19e 100644 --- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c +++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c @@ -46,7 +46,7 @@ void test_prog_run_xattr(void) tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access); err = bpf_prog_test_run_xattr(&tattr); - CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run", + CHECK_ATTR(err >= 0 || errno != ENOSPC || tattr.retval, "run", "err %d errno %d retval %d\n", err, errno, tattr.retval); CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out", @@ -78,6 +78,6 @@ void test_prog_run_xattr(void) cleanup: if (skel) test_pkt_access__destroy(skel); - if (stats_fd != -1) + if (stats_fd >= 0) close(stats_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c index c5fb191874ac..41720a62c4fa 100644 --- a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c @@ -77,7 +77,7 @@ void test_raw_tp_test_run(void) /* invalid cpu ID should fail with ENXIO */ opts.cpu = 0xffffffff; err = bpf_prog_test_run_opts(prog_fd, &opts); - CHECK(err != -1 || errno != ENXIO, + CHECK(err >= 0 || errno != ENXIO, "test_run_opts_fail", "should failed with ENXIO\n"); @@ -85,7 +85,7 @@ void test_raw_tp_test_run(void) opts.cpu = 1; opts.flags = 0; err = bpf_prog_test_run_opts(prog_fd, &opts); - CHECK(err != -1 || errno != EINVAL, + CHECK(err >= 0 || errno != EINVAL, "test_run_opts_fail", "should failed with EINVAL\n"); diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c index 563e12120e77..5f9eaa3ab584 100644 --- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c +++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c @@ -30,7 +30,7 @@ void test_rdonly_maps(void) struct bss bss; obj = bpf_object__open_file(file, NULL); - if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open")) return; err = bpf_object__load(obj); @@ -58,11 +58,8 @@ void test_rdonly_maps(void) goto cleanup; link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); - if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n", - t->prog_name, PTR_ERR(link))) { - link = NULL; + if (!ASSERT_OK_PTR(link, "attach_prog")) goto cleanup; - } /* trigger probe */ usleep(1); diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index ac1ee10cffd8..de2688166696 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -15,7 +15,7 @@ void test_reference_tracking(void) int err = 0; obj = bpf_object__open_file(file, &open_opts); - if (CHECK_FAIL(IS_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name", diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index d3c2de2c24d1..f62361306f6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -76,7 +76,7 @@ __resolve_symbol(struct btf *btf, int type_id) } for (i = 0; i < ARRAY_SIZE(test_symbols); i++) { - if (test_symbols[i].id != -1) + if (test_symbols[i].id >= 0) continue; if (BTF_INFO_KIND(type->info) != test_symbols[i].type) diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index de78617f6550..a01788090c31 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -12,7 +12,7 @@ #include <sys/sysinfo.h> #include <linux/perf_event.h> #include <linux/ring_buffer.h> -#include "test_ringbuf.skel.h" +#include "test_ringbuf.lskel.h" #define EDONE 7777 @@ -86,25 +86,70 @@ void test_ringbuf(void) const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); pthread_t thread; long bg_ret = -1; - int err, cnt; + int err, cnt, rb_fd; int page_size = getpagesize(); + void *mmap_ptr, *tmp_ptr; skel = test_ringbuf__open(); if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; - err = bpf_map__set_max_entries(skel->maps.ringbuf, page_size); - if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n")) - goto cleanup; + skel->maps.ringbuf.max_entries = page_size; err = test_ringbuf__load(skel); if (CHECK(err != 0, "skel_load", "skeleton load failed\n")) goto cleanup; + rb_fd = bpf_map__fd(skel->maps.ringbuf); + /* good read/write cons_pos */ + mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); + ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"); + tmp_ptr = mremap(mmap_ptr, page_size, 2 * page_size, MREMAP_MAYMOVE); + if (!ASSERT_ERR_PTR(tmp_ptr, "rw_extend")) + goto cleanup; + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect"); + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); + + /* bad writeable prod_pos */ + mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, page_size); + err = -errno; + ASSERT_ERR_PTR(mmap_ptr, "wr_prod_pos"); + ASSERT_EQ(err, -EPERM, "wr_prod_pos_err"); + + /* bad writeable data pages */ + mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size); + err = -errno; + ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_one"); + ASSERT_EQ(err, -EPERM, "wr_data_page_one_err"); + mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 3 * page_size); + ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_two"); + mmap_ptr = mmap(NULL, 2 * page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size); + ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_all"); + + /* good read-only pages */ + mmap_ptr = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED, rb_fd, 0); + if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos")) + goto cleanup; + + ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_WRITE), "write_protect"); + ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_EXEC), "exec_protect"); + ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "ro_remap"); + ASSERT_OK(munmap(mmap_ptr, 4 * page_size), "unmap_ro"); + + /* good read-only pages with initial offset */ + mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, page_size); + if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos")) + goto cleanup; + + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_protect"); + ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_protect"); + ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 3 * page_size, MREMAP_MAYMOVE), "ro_remap"); + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro"); + /* only trigger BPF program for current process */ skel->bss->pid = getpid(); - ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf), + ringbuf = ring_buffer__new(skel->maps.ringbuf.map_fd, process_sample, NULL, NULL); if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n")) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index cef63e703924..167cd8a2edfd 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -63,7 +63,7 @@ void test_ringbuf_multi(void) goto cleanup; proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0); - if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n")) + if (CHECK(proto_fd < 0, "bpf_create_map", "bpf_create_map failed\n")) goto cleanup; err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 821b4146b7b6..4efd337d6a3c 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -78,7 +78,7 @@ static int create_maps(enum bpf_map_type inner_type) attr.max_entries = REUSEPORT_ARRAY_SIZE; reuseport_array = bpf_create_map_xattr(&attr); - RET_ERR(reuseport_array == -1, "creating reuseport_array", + RET_ERR(reuseport_array < 0, "creating reuseport_array", "reuseport_array:%d errno:%d\n", reuseport_array, errno); /* Creating outer_map */ @@ -89,7 +89,7 @@ static int create_maps(enum bpf_map_type inner_type) attr.max_entries = 1; attr.inner_map_fd = reuseport_array; outer_map = bpf_create_map_xattr(&attr); - RET_ERR(outer_map == -1, "creating outer_map", + RET_ERR(outer_map < 0, "creating outer_map", "outer_map:%d errno:%d\n", outer_map, errno); return 0; @@ -102,8 +102,9 @@ static int prepare_bpf_obj(void) int err; obj = bpf_object__open("test_select_reuseport_kern.o"); - RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o", - "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj)); + err = libbpf_get_error(obj); + RET_ERR(err, "open test_select_reuseport_kern.o", + "obj:%p PTR_ERR(obj):%d\n", obj, err); map = bpf_object__find_map_by_name(obj, "outer_map"); RET_ERR(!map, "find outer_map", "!map\n"); @@ -116,31 +117,31 @@ static int prepare_bpf_obj(void) prog = bpf_program__next(NULL, obj); RET_ERR(!prog, "get first bpf_program", "!prog\n"); select_by_skb_data_prog = bpf_program__fd(prog); - RET_ERR(select_by_skb_data_prog == -1, "get prog fd", + RET_ERR(select_by_skb_data_prog < 0, "get prog fd", "select_by_skb_data_prog:%d\n", select_by_skb_data_prog); map = bpf_object__find_map_by_name(obj, "result_map"); RET_ERR(!map, "find result_map", "!map\n"); result_map = bpf_map__fd(map); - RET_ERR(result_map == -1, "get result_map fd", + RET_ERR(result_map < 0, "get result_map fd", "result_map:%d\n", result_map); map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map"); RET_ERR(!map, "find tmp_index_ovr_map\n", "!map"); tmp_index_ovr_map = bpf_map__fd(map); - RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd", + RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd", "tmp_index_ovr_map:%d\n", tmp_index_ovr_map); map = bpf_object__find_map_by_name(obj, "linum_map"); RET_ERR(!map, "find linum_map", "!map\n"); linum_map = bpf_map__fd(map); - RET_ERR(linum_map == -1, "get linum_map fd", + RET_ERR(linum_map < 0, "get linum_map fd", "linum_map:%d\n", linum_map); map = bpf_object__find_map_by_name(obj, "data_check_map"); RET_ERR(!map, "find data_check_map", "!map\n"); data_check_map = bpf_map__fd(map); - RET_ERR(data_check_map == -1, "get data_check_map fd", + RET_ERR(data_check_map < 0, "get data_check_map fd", "data_check_map:%d\n", data_check_map); return 0; @@ -237,7 +238,7 @@ static long get_linum(void) int err; err = bpf_map_lookup_elem(linum_map, &index_zero, &linum); - RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n", + RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n", err, errno); return linum; @@ -254,11 +255,11 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, addrlen = sizeof(cli_sa); err = getsockname(cli_fd, (struct sockaddr *)&cli_sa, &addrlen); - RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n", + RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem(data_check_map, &index_zero, &result); - RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n", + RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n", err, errno); if (type == SOCK_STREAM) { @@ -347,7 +348,7 @@ static void check_results(void) for (i = 0; i < NR_RESULTS; i++) { err = bpf_map_lookup_elem(result_map, &i, &results[i]); - RET_IF(err == -1, "lookup_elem(result_map)", + RET_IF(err < 0, "lookup_elem(result_map)", "i:%u err:%d errno:%d\n", i, err, errno); } @@ -524,12 +525,12 @@ static void test_syncookie(int type, sa_family_t family) */ err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &tmp_index, BPF_ANY); - RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)", + RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)", "err:%d errno:%d\n", err, errno); do_test(type, family, &cmd, PASS); err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero, &tmp_index); - RET_IF(err == -1 || tmp_index != -1, + RET_IF(err < 0 || tmp_index >= 0, "lookup_elem(tmp_index_ovr_map)", "err:%d errno:%d tmp_index:%d\n", err, errno, tmp_index); @@ -569,7 +570,7 @@ static void test_detach_bpf(int type, sa_family_t family) for (i = 0; i < NR_RESULTS; i++) { err = bpf_map_lookup_elem(result_map, &i, &tmp); - RET_IF(err == -1, "lookup_elem(result_map)", + RET_IF(err < 0, "lookup_elem(result_map)", "i:%u err:%d errno:%d\n", i, err, errno); nr_run_before += tmp; } @@ -584,7 +585,7 @@ static void test_detach_bpf(int type, sa_family_t family) for (i = 0; i < NR_RESULTS; i++) { err = bpf_map_lookup_elem(result_map, &i, &tmp); - RET_IF(err == -1, "lookup_elem(result_map)", + RET_IF(err < 0, "lookup_elem(result_map)", "i:%u err:%d errno:%d\n", i, err, errno); nr_run_after += tmp; } @@ -632,24 +633,24 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany) SO_ATTACH_REUSEPORT_EBPF, &select_by_skb_data_prog, sizeof(select_by_skb_data_prog)); - RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)", + RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)", "err:%d errno:%d\n", err, errno); } err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen); - RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n", + RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n", i, err, errno); if (type == SOCK_STREAM) { err = listen(sk_fds[i], 10); - RET_IF(err == -1, "listen()", + RET_IF(err < 0, "listen()", "sk_fds[%d] err:%d errno:%d\n", i, err, errno); } err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i], BPF_NOEXIST); - RET_IF(err == -1, "update_elem(reuseport_array)", + RET_IF(err < 0, "update_elem(reuseport_array)", "sk_fds[%d] err:%d errno:%d\n", i, err, errno); if (i == first) { @@ -682,7 +683,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany, prepare_sk_fds(type, family, inany); err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr, BPF_ANY); - RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)", + RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)", "err:%d errno:%d\n", err, errno); /* Install reuseport_array to outer_map? */ @@ -691,7 +692,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany, err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array, BPF_ANY); - RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)", + RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)", "err:%d errno:%d\n", err, errno); } @@ -720,18 +721,18 @@ static void cleanup_per_test(bool no_inner_map) return; err = bpf_map_delete_elem(outer_map, &index_zero); - RET_IF(err == -1, "delete_elem(outer_map)", + RET_IF(err < 0, "delete_elem(outer_map)", "err:%d errno:%d\n", err, errno); } static void cleanup(void) { - if (outer_map != -1) { + if (outer_map >= 0) { close(outer_map); outer_map = -1; } - if (reuseport_array != -1) { + if (reuseport_array >= 0) { close(reuseport_array); reuseport_array = -1; } diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 7043e6ded0e6..023cc532992d 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -2,7 +2,7 @@ #include <test_progs.h> #include "test_send_signal_kern.skel.h" -static volatile int sigusr1_received = 0; +int sigusr1_received = 0; static void sigusr1_handler(int signum) { @@ -91,8 +91,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, skel->links.send_signal_perf = bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd); - if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event", - "err %ld\n", PTR_ERR(skel->links.send_signal_perf))) + if (!ASSERT_OK_PTR(skel->links.send_signal_perf, "attach_perf_event")) goto disable_pmu; } diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index 45c82db3c58c..aee41547e7f4 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -480,7 +480,7 @@ static struct bpf_link *attach_lookup_prog(struct bpf_program *prog) } link = bpf_program__attach_netns(prog, net_fd); - if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) { + if (!ASSERT_OK_PTR(link, "bpf_program__attach_netns")) { errno = -PTR_ERR(link); log_err("failed to attach program '%s' to netns", bpf_program__name(prog)); diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index fe87b77af459..f6f130c99b8c 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -82,10 +82,8 @@ void test_skeleton(void) CHECK(data->out2 != 2, "res2", "got %lld != exp %d\n", data->out2, 2); CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3); CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4); - CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n", - bss->handler_out5.a, 5); - CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n", - bss->handler_out5.b, 6); + CHECK(bss->out5.a != 5, "res5", "got %d != exp %d\n", bss->out5.a, 5); + CHECK(bss->out5.b != 6, "res6", "got %lld != exp %d\n", bss->out5.b, 6); CHECK(bss->out6 != 14, "res7", "got %d != exp %d\n", bss->out6, 14); CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1", diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index af87118e748e..577d619fb07e 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -97,12 +97,12 @@ static void check_result(void) err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, &egress_linum); - CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, &ingress_linum); - CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)", "err:%d errno:%d\n", err, errno); memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk)); @@ -355,14 +355,12 @@ void test_sock_fields(void) egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd); - if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n", - PTR_ERR(egress_link))) + if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)")) goto done; ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd); - if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n", - PTR_ERR(ingress_link))) + if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)")) goto done; linum_map_fd = bpf_map__fd(skel->maps.linum_map); @@ -375,8 +373,8 @@ done: bpf_link__destroy(egress_link); bpf_link__destroy(ingress_link); test_sock_fields__destroy(skel); - if (child_cg_fd != -1) + if (child_cg_fd >= 0) close(child_cg_fd); - if (parent_cg_fd != -1) + if (parent_cg_fd >= 0) close(parent_cg_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index ab77596b64e3..1352ec104149 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -88,11 +88,11 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type) int s, map, err; s = connected_socket_v4(); - if (CHECK_FAIL(s == -1)) + if (CHECK_FAIL(s < 0)) return; map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); - if (CHECK_FAIL(map == -1)) { + if (CHECK_FAIL(map < 0)) { perror("bpf_create_map"); goto out; } @@ -245,7 +245,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); link = bpf_program__attach_iter(skel->progs.copy, &opts); - if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + if (!ASSERT_OK_PTR(link, "attach_iter")) goto out; iter_fd = bpf_iter_create(bpf_link__fd(link)); @@ -304,7 +304,7 @@ static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first, } err = bpf_prog_attach(verdict, map, second, 0); - assert(err == -1 && errno == EBUSY); + ASSERT_EQ(err, -EBUSY, "prog_attach_fail"); err = bpf_prog_detach2(verdict, map, first); if (CHECK_FAIL(err)) { diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 06b86addc181..7a0d64fdc192 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -98,7 +98,7 @@ static void run_tests(int family, enum bpf_map_type map_type) int map; map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); - if (CHECK_FAIL(map == -1)) { + if (CHECK_FAIL(map < 0)) { perror("bpf_map_create"); return; } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 648d9ae898d2..0f066b89b4af 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -139,7 +139,7 @@ #define xbpf_map_delete_elem(fd, key) \ ({ \ int __ret = bpf_map_delete_elem((fd), (key)); \ - if (__ret == -1) \ + if (__ret < 0) \ FAIL_ERRNO("map_delete"); \ __ret; \ }) @@ -147,7 +147,7 @@ #define xbpf_map_lookup_elem(fd, key, val) \ ({ \ int __ret = bpf_map_lookup_elem((fd), (key), (val)); \ - if (__ret == -1) \ + if (__ret < 0) \ FAIL_ERRNO("map_lookup"); \ __ret; \ }) @@ -155,7 +155,7 @@ #define xbpf_map_update_elem(fd, key, val, flags) \ ({ \ int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \ - if (__ret == -1) \ + if (__ret < 0) \ FAIL_ERRNO("map_update"); \ __ret; \ }) @@ -164,7 +164,7 @@ ({ \ int __ret = \ bpf_prog_attach((prog), (target), (type), (flags)); \ - if (__ret == -1) \ + if (__ret < 0) \ FAIL_ERRNO("prog_attach(" #type ")"); \ __ret; \ }) @@ -172,7 +172,7 @@ #define xbpf_prog_detach2(prog, target, type) \ ({ \ int __ret = bpf_prog_detach2((prog), (target), (type)); \ - if (__ret == -1) \ + if (__ret < 0) \ FAIL_ERRNO("prog_detach2(" #type ")"); \ __ret; \ }) diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index 11a769e18f5d..0a91d8d9954b 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -62,8 +62,7 @@ retry: skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, pmu_fd); - if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event", - "err %ld\n", PTR_ERR(skel->links.oncpu))) { + if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) { close(pmu_fd); goto cleanup; } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c index 37269d23df93..04b476bd62b9 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c @@ -21,7 +21,7 @@ void test_stacktrace_map(void) goto close_prog; link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch"); - if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_tp")) goto close_prog; /* find map fds */ diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c index 404a5498e1a3..4fd30bb651ad 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c @@ -21,7 +21,7 @@ void test_stacktrace_map_raw_tp(void) goto close_prog; link = bpf_program__attach_raw_tracepoint(prog, "sched_switch"); - if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto close_prog; /* find map fds */ @@ -59,7 +59,6 @@ void test_stacktrace_map_raw_tp(void) goto close_prog; close_prog: - if (!IS_ERR_OR_NULL(link)) - bpf_link__destroy(link); + bpf_link__destroy(link); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/static_linked.c b/tools/testing/selftests/bpf/prog_tests/static_linked.c index 46556976dccc..5c4e3014e063 100644 --- a/tools/testing/selftests/bpf/prog_tests/static_linked.c +++ b/tools/testing/selftests/bpf/prog_tests/static_linked.c @@ -14,12 +14,7 @@ void test_static_linked(void) return; skel->rodata->rovar1 = 1; - skel->bss->static_var1 = 2; - skel->bss->static_var11 = 3; - skel->rodata->rovar2 = 4; - skel->bss->static_var2 = 5; - skel->bss->static_var22 = 6; err = test_static_linked__load(skel); if (!ASSERT_OK(err, "skel_load")) @@ -32,8 +27,8 @@ void test_static_linked(void) /* trigger */ usleep(1); - ASSERT_EQ(skel->bss->var1, 1 * 2 + 2 + 3, "var1"); - ASSERT_EQ(skel->bss->var2, 4 * 3 + 5 + 6, "var2"); + ASSERT_EQ(skel->data->var1, 1 * 2 + 2 + 3, "var1"); + ASSERT_EQ(skel->data->var2, 4 * 3 + 5 + 6, "var2"); cleanup: test_static_linked__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c new file mode 100644 index 000000000000..81e997a69f7a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/syscall.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <test_progs.h> +#include "syscall.skel.h" + +struct args { + __u64 log_buf; + __u32 log_size; + int max_entries; + int map_fd; + int prog_fd; + int btf_fd; +}; + +void test_syscall(void) +{ + static char verifier_log[8192]; + struct args ctx = { + .max_entries = 1024, + .log_buf = (uintptr_t) verifier_log, + .log_size = sizeof(verifier_log), + }; + struct bpf_prog_test_run_attr tattr = { + .ctx_in = &ctx, + .ctx_size_in = sizeof(ctx), + }; + struct syscall *skel = NULL; + __u64 key = 12, value = 0; + int err; + + skel = syscall__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + tattr.prog_fd = bpf_program__fd(skel->progs.bpf_prog); + err = bpf_prog_test_run_xattr(&tattr); + ASSERT_EQ(err, 0, "err"); + ASSERT_EQ(tattr.retval, 1, "retval"); + ASSERT_GT(ctx.map_fd, 0, "ctx.map_fd"); + ASSERT_GT(ctx.prog_fd, 0, "ctx.prog_fd"); + ASSERT_OK(memcmp(verifier_log, "processed", sizeof("processed") - 1), + "verifier_log"); + + err = bpf_map_lookup_elem(ctx.map_fd, &key, &value); + ASSERT_EQ(err, 0, "map_lookup"); + ASSERT_EQ(value, 34, "map lookup value"); +cleanup: + syscall__destroy(skel); + if (ctx.prog_fd > 0) + close(ctx.prog_fd); + if (ctx.map_fd > 0) + close(ctx.map_fd); + if (ctx.btf_fd > 0) + close(ctx.btf_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c new file mode 100644 index 000000000000..4a505a5adf4d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c @@ -0,0 +1,395 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include <linux/pkt_cls.h> + +#include "test_tc_bpf.skel.h" + +#define LO_IFINDEX 1 + +#define TEST_DECLARE_OPTS(__fd) \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_h, .handle = 1); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_p, .priority = 1); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_f, .prog_fd = __fd); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hp, .handle = 1, .priority = 1); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hf, .handle = 1, .prog_fd = __fd); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_pf, .priority = 1, .prog_fd = __fd); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpf, .handle = 1, .priority = 1, .prog_fd = __fd); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpi, .handle = 1, .priority = 1, .prog_id = 42); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpr, .handle = 1, .priority = 1, \ + .flags = BPF_TC_F_REPLACE); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpfi, .handle = 1, .priority = 1, .prog_fd = __fd, \ + .prog_id = 42); \ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_prio_max, .handle = 1, .priority = UINT16_MAX + 1); + +static int test_tc_bpf_basic(const struct bpf_tc_hook *hook, int fd) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd); + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int ret; + + ret = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd")) + return ret; + + ret = bpf_tc_attach(hook, &opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + return ret; + + if (!ASSERT_EQ(opts.handle, 1, "handle set") || + !ASSERT_EQ(opts.priority, 1, "priority set") || + !ASSERT_EQ(opts.prog_id, info.id, "prog_id set")) + goto end; + + opts.prog_id = 0; + opts.flags = BPF_TC_F_REPLACE; + ret = bpf_tc_attach(hook, &opts); + if (!ASSERT_OK(ret, "bpf_tc_attach replace mode")) + goto end; + + opts.flags = opts.prog_fd = opts.prog_id = 0; + ret = bpf_tc_query(hook, &opts); + if (!ASSERT_OK(ret, "bpf_tc_query")) + goto end; + + if (!ASSERT_EQ(opts.handle, 1, "handle set") || + !ASSERT_EQ(opts.priority, 1, "priority set") || + !ASSERT_EQ(opts.prog_id, info.id, "prog_id set")) + goto end; + +end: + opts.flags = opts.prog_fd = opts.prog_id = 0; + ret = bpf_tc_detach(hook, &opts); + ASSERT_OK(ret, "bpf_tc_detach"); + return ret; +} + +static int test_tc_bpf_api(struct bpf_tc_hook *hook, int fd) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_opts, attach_opts, .handle = 1, .priority = 1, .prog_fd = fd); + DECLARE_LIBBPF_OPTS(bpf_tc_hook, inv_hook, .attach_point = BPF_TC_INGRESS); + DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1); + int ret; + + ret = bpf_tc_hook_create(NULL); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook = NULL")) + return -EINVAL; + + /* hook ifindex = 0 */ + ret = bpf_tc_hook_create(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex == 0")) + return -EINVAL; + + ret = bpf_tc_hook_destroy(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex == 0")) + return -EINVAL; + + ret = bpf_tc_attach(&inv_hook, &attach_opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex == 0")) + return -EINVAL; + attach_opts.prog_id = 0; + + ret = bpf_tc_detach(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex == 0")) + return -EINVAL; + + ret = bpf_tc_query(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex == 0")) + return -EINVAL; + + /* hook ifindex < 0 */ + inv_hook.ifindex = -1; + + ret = bpf_tc_hook_create(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex < 0")) + return -EINVAL; + + ret = bpf_tc_hook_destroy(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex < 0")) + return -EINVAL; + + ret = bpf_tc_attach(&inv_hook, &attach_opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex < 0")) + return -EINVAL; + attach_opts.prog_id = 0; + + ret = bpf_tc_detach(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex < 0")) + return -EINVAL; + + ret = bpf_tc_query(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex < 0")) + return -EINVAL; + + inv_hook.ifindex = LO_IFINDEX; + + /* hook.attach_point invalid */ + inv_hook.attach_point = 0xabcd; + ret = bpf_tc_hook_create(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook.attach_point")) + return -EINVAL; + + ret = bpf_tc_hook_destroy(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook.attach_point")) + return -EINVAL; + + ret = bpf_tc_attach(&inv_hook, &attach_opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook.attach_point")) + return -EINVAL; + + ret = bpf_tc_detach(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook.attach_point")) + return -EINVAL; + + ret = bpf_tc_query(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook.attach_point")) + return -EINVAL; + + inv_hook.attach_point = BPF_TC_INGRESS; + + /* hook.attach_point valid, but parent invalid */ + inv_hook.parent = TC_H_MAKE(1UL << 16, 10); + ret = bpf_tc_hook_create(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_hook_destroy(&inv_hook); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_attach(&inv_hook, &attach_opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_detach(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_query(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent")) + return -EINVAL; + + inv_hook.attach_point = BPF_TC_CUSTOM; + inv_hook.parent = 0; + /* These return EOPNOTSUPP instead of EINVAL as parent is checked after + * attach_point of the hook. + */ + ret = bpf_tc_hook_create(&inv_hook); + if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_hook_destroy(&inv_hook); + if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_attach(&inv_hook, &attach_opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_detach(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent")) + return -EINVAL; + + ret = bpf_tc_query(&inv_hook, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent")) + return -EINVAL; + + inv_hook.attach_point = BPF_TC_INGRESS; + + /* detach */ + { + TEST_DECLARE_OPTS(fd); + + ret = bpf_tc_detach(NULL, &opts_hp); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook = NULL")) + return -EINVAL; + + ret = bpf_tc_detach(hook, NULL); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid opts = NULL")) + return -EINVAL; + + ret = bpf_tc_detach(hook, &opts_hpr); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid flags set")) + return -EINVAL; + + ret = bpf_tc_detach(hook, &opts_hpf); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_fd set")) + return -EINVAL; + + ret = bpf_tc_detach(hook, &opts_hpi); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_id set")) + return -EINVAL; + + ret = bpf_tc_detach(hook, &opts_p); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid handle unset")) + return -EINVAL; + + ret = bpf_tc_detach(hook, &opts_h); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority unset")) + return -EINVAL; + + ret = bpf_tc_detach(hook, &opts_prio_max); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority > UINT16_MAX")) + return -EINVAL; + } + + /* query */ + { + TEST_DECLARE_OPTS(fd); + + ret = bpf_tc_query(NULL, &opts); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook = NULL")) + return -EINVAL; + + ret = bpf_tc_query(hook, NULL); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid opts = NULL")) + return -EINVAL; + + ret = bpf_tc_query(hook, &opts_hpr); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid flags set")) + return -EINVAL; + + ret = bpf_tc_query(hook, &opts_hpf); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_fd set")) + return -EINVAL; + + ret = bpf_tc_query(hook, &opts_hpi); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_id set")) + return -EINVAL; + + ret = bpf_tc_query(hook, &opts_p); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid handle unset")) + return -EINVAL; + + ret = bpf_tc_query(hook, &opts_h); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority unset")) + return -EINVAL; + + ret = bpf_tc_query(hook, &opts_prio_max); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority > UINT16_MAX")) + return -EINVAL; + + /* when chain is not present, kernel returns -EINVAL */ + ret = bpf_tc_query(hook, &opts_hp); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query valid handle, priority set")) + return -EINVAL; + } + + /* attach */ + { + TEST_DECLARE_OPTS(fd); + + ret = bpf_tc_attach(NULL, &opts_hp); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook = NULL")) + return -EINVAL; + + ret = bpf_tc_attach(hook, NULL); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid opts = NULL")) + return -EINVAL; + + opts_hp.flags = 42; + ret = bpf_tc_attach(hook, &opts_hp); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid flags")) + return -EINVAL; + + ret = bpf_tc_attach(hook, NULL); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_fd unset")) + return -EINVAL; + + ret = bpf_tc_attach(hook, &opts_hpi); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_id set")) + return -EINVAL; + + ret = bpf_tc_attach(hook, &opts_pf); + if (!ASSERT_OK(ret, "bpf_tc_attach valid handle unset")) + return -EINVAL; + opts_pf.prog_fd = opts_pf.prog_id = 0; + ASSERT_OK(bpf_tc_detach(hook, &opts_pf), "bpf_tc_detach"); + + ret = bpf_tc_attach(hook, &opts_hf); + if (!ASSERT_OK(ret, "bpf_tc_attach valid priority unset")) + return -EINVAL; + opts_hf.prog_fd = opts_hf.prog_id = 0; + ASSERT_OK(bpf_tc_detach(hook, &opts_hf), "bpf_tc_detach"); + + ret = bpf_tc_attach(hook, &opts_prio_max); + if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid priority > UINT16_MAX")) + return -EINVAL; + + ret = bpf_tc_attach(hook, &opts_f); + if (!ASSERT_OK(ret, "bpf_tc_attach valid both handle and priority unset")) + return -EINVAL; + opts_f.prog_fd = opts_f.prog_id = 0; + ASSERT_OK(bpf_tc_detach(hook, &opts_f), "bpf_tc_detach"); + } + + return 0; +} + +void test_tc_bpf(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, + .attach_point = BPF_TC_INGRESS); + struct test_tc_bpf *skel = NULL; + bool hook_created = false; + int cls_fd, ret; + + skel = test_tc_bpf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load")) + return; + + cls_fd = bpf_program__fd(skel->progs.cls); + + ret = bpf_tc_hook_create(&hook); + if (ret == 0) + hook_created = true; + + ret = ret == -EEXIST ? 0 : ret; + if (!ASSERT_OK(ret, "bpf_tc_hook_create(BPF_TC_INGRESS)")) + goto end; + + hook.attach_point = BPF_TC_CUSTOM; + hook.parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS); + ret = bpf_tc_hook_create(&hook); + if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook.attach_point")) + goto end; + + ret = test_tc_bpf_basic(&hook, cls_fd); + if (!ASSERT_OK(ret, "test_tc_internal ingress")) + goto end; + + ret = bpf_tc_hook_destroy(&hook); + if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook.attach_point")) + goto end; + + hook.attach_point = BPF_TC_INGRESS; + hook.parent = 0; + bpf_tc_hook_destroy(&hook); + + ret = test_tc_bpf_basic(&hook, cls_fd); + if (!ASSERT_OK(ret, "test_tc_internal ingress")) + goto end; + + bpf_tc_hook_destroy(&hook); + + hook.attach_point = BPF_TC_EGRESS; + ret = test_tc_bpf_basic(&hook, cls_fd); + if (!ASSERT_OK(ret, "test_tc_internal egress")) + goto end; + + bpf_tc_hook_destroy(&hook); + + ret = test_tc_bpf_api(&hook, cls_fd); + if (!ASSERT_OK(ret, "test_tc_bpf_api")) + goto end; + + bpf_tc_hook_destroy(&hook); + +end: + if (hook_created) { + hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + bpf_tc_hook_destroy(&hook); + } + test_tc_bpf__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c new file mode 100644 index 000000000000..5703c918812b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -0,0 +1,785 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link + * between src and dst. The netns fwd has veth links to each src and dst. The + * client is in src and server in dst. The test installs a TC BPF program to each + * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the + * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace + * switch from ingress side; it also installs a checker prog on the egress side + * to drop unexpected traffic. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <linux/limits.h> +#include <linux/sysctl.h> +#include <linux/if_tun.h> +#include <linux/if.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <sys/stat.h> +#include <sys/mount.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "test_tc_neigh_fib.skel.h" +#include "test_tc_neigh.skel.h" +#include "test_tc_peer.skel.h" + +#define NS_SRC "ns_src" +#define NS_FWD "ns_fwd" +#define NS_DST "ns_dst" + +#define IP4_SRC "172.16.1.100" +#define IP4_DST "172.16.2.100" +#define IP4_TUN_SRC "172.17.1.100" +#define IP4_TUN_FWD "172.17.1.200" +#define IP4_PORT 9004 + +#define IP6_SRC "0::1:dead:beef:cafe" +#define IP6_DST "0::2:dead:beef:cafe" +#define IP6_TUN_SRC "1::1:dead:beef:cafe" +#define IP6_TUN_FWD "1::2:dead:beef:cafe" +#define IP6_PORT 9006 + +#define IP4_SLL "169.254.0.1" +#define IP4_DLL "169.254.0.2" +#define IP4_NET "169.254.0.0" + +#define MAC_DST_FWD "00:11:22:33:44:55" +#define MAC_DST "00:22:33:44:55:66" + +#define IFADDR_STR_LEN 18 +#define PING_ARGS "-i 0.2 -c 3 -w 10 -q" + +#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src" +#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst" +#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk" + +#define TIMEOUT_MILLIS 10000 + +#define log_err(MSG, ...) \ + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__) + +static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL}; + +static int write_file(const char *path, const char *newval) +{ + FILE *f; + + f = fopen(path, "r+"); + if (!f) + return -1; + if (fwrite(newval, strlen(newval), 1, f) != 1) { + log_err("writing to %s failed", path); + fclose(f); + return -1; + } + fclose(f); + return 0; +} + +struct nstoken { + int orig_netns_fd; +}; + +static int setns_by_fd(int nsfd) +{ + int err; + + err = setns(nsfd, CLONE_NEWNET); + close(nsfd); + + if (!ASSERT_OK(err, "setns")) + return err; + + /* Switch /sys to the new namespace so that e.g. /sys/class/net + * reflects the devices in the new namespace. + */ + err = unshare(CLONE_NEWNS); + if (!ASSERT_OK(err, "unshare")) + return err; + + err = umount2("/sys", MNT_DETACH); + if (!ASSERT_OK(err, "umount2 /sys")) + return err; + + err = mount("sysfs", "/sys", "sysfs", 0, NULL); + if (!ASSERT_OK(err, "mount /sys")) + return err; + + err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL); + if (!ASSERT_OK(err, "mount /sys/fs/bpf")) + return err; + + return 0; +} + +/** + * open_netns() - Switch to specified network namespace by name. + * + * Returns token with which to restore the original namespace + * using close_netns(). + */ +static struct nstoken *open_netns(const char *name) +{ + int nsfd; + char nspath[PATH_MAX]; + int err; + struct nstoken *token; + + token = malloc(sizeof(struct nstoken)); + if (!ASSERT_OK_PTR(token, "malloc token")) + return NULL; + + token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY); + if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net")) + goto fail; + + snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); + nsfd = open(nspath, O_RDONLY | O_CLOEXEC); + if (!ASSERT_GE(nsfd, 0, "open netns fd")) + goto fail; + + err = setns_by_fd(nsfd); + if (!ASSERT_OK(err, "setns_by_fd")) + goto fail; + + return token; +fail: + free(token); + return NULL; +} + +static void close_netns(struct nstoken *token) +{ + ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd"); + free(token); +} + +static int netns_setup_namespaces(const char *verb) +{ + const char * const *ns = namespaces; + char cmd[128]; + + while (*ns) { + snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns); + if (!ASSERT_OK(system(cmd), cmd)) + return -1; + ns++; + } + return 0; +} + +struct netns_setup_result { + int ifindex_veth_src_fwd; + int ifindex_veth_dst_fwd; +}; + +static int get_ifaddr(const char *name, char *ifaddr) +{ + char path[PATH_MAX]; + FILE *f; + int ret; + + snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name); + f = fopen(path, "r"); + if (!ASSERT_OK_PTR(f, path)) + return -1; + + ret = fread(ifaddr, 1, IFADDR_STR_LEN, f); + if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) { + fclose(f); + return -1; + } + fclose(f); + return 0; +} + +static int get_ifindex(const char *name) +{ + char path[PATH_MAX]; + char buf[32]; + FILE *f; + int ret; + + snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name); + f = fopen(path, "r"); + if (!ASSERT_OK_PTR(f, path)) + return -1; + + ret = fread(buf, 1, sizeof(buf), f); + if (!ASSERT_GT(ret, 0, "fread ifindex")) { + fclose(f); + return -1; + } + fclose(f); + return atoi(buf); +} + +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto fail; \ + }) + +static int netns_setup_links_and_routes(struct netns_setup_result *result) +{ + struct nstoken *nstoken = NULL; + char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; + + SYS("ip link add veth_src type veth peer name veth_src_fwd"); + SYS("ip link add veth_dst type veth peer name veth_dst_fwd"); + + SYS("ip link set veth_dst_fwd address " MAC_DST_FWD); + SYS("ip link set veth_dst address " MAC_DST); + + if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) + goto fail; + + result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd"); + if (result->ifindex_veth_src_fwd < 0) + goto fail; + result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd"); + if (result->ifindex_veth_dst_fwd < 0) + goto fail; + + SYS("ip link set veth_src netns " NS_SRC); + SYS("ip link set veth_src_fwd netns " NS_FWD); + SYS("ip link set veth_dst_fwd netns " NS_FWD); + SYS("ip link set veth_dst netns " NS_DST); + + /** setup in 'src' namespace */ + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns src")) + goto fail; + + SYS("ip addr add " IP4_SRC "/32 dev veth_src"); + SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad"); + SYS("ip link set dev veth_src up"); + + SYS("ip route add " IP4_DST "/32 dev veth_src scope global"); + SYS("ip route add " IP4_NET "/16 dev veth_src scope global"); + SYS("ip route add " IP6_DST "/128 dev veth_src scope global"); + + SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s", + veth_src_fwd_addr); + SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s", + veth_src_fwd_addr); + + close_netns(nstoken); + + /** setup in 'fwd' namespace */ + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + goto fail; + + /* The fwd netns automatically gets a v6 LL address / routes, but also + * needs v4 one in order to start ARP probing. IP4_NET route is added + * to the endpoints so that the ARP processing will reply. + */ + SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd"); + SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); + SYS("ip link set dev veth_src_fwd up"); + SYS("ip link set dev veth_dst_fwd up"); + + SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); + SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); + SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); + SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); + + close_netns(nstoken); + + /** setup in 'dst' namespace */ + nstoken = open_netns(NS_DST); + if (!ASSERT_OK_PTR(nstoken, "setns dst")) + goto fail; + + SYS("ip addr add " IP4_DST "/32 dev veth_dst"); + SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad"); + SYS("ip link set dev veth_dst up"); + + SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global"); + SYS("ip route add " IP4_NET "/16 dev veth_dst scope global"); + SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global"); + + SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD); + + close_netns(nstoken); + + return 0; +fail: + if (nstoken) + close_netns(nstoken); + return -1; +} + +static int netns_load_bpf(void) +{ + SYS("tc qdisc add dev veth_src_fwd clsact"); + SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned " + SRC_PROG_PIN_FILE); + SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned " + CHK_PROG_PIN_FILE); + + SYS("tc qdisc add dev veth_dst_fwd clsact"); + SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " + DST_PROG_PIN_FILE); + SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " + CHK_PROG_PIN_FILE); + + return 0; +fail: + return -1; +} + +static void test_tcp(int family, const char *addr, __u16 port) +{ + int listen_fd = -1, accept_fd = -1, client_fd = -1; + char buf[] = "testing testing"; + int n; + struct nstoken *nstoken; + + nstoken = open_netns(NS_DST); + if (!ASSERT_OK_PTR(nstoken, "setns dst")) + return; + + listen_fd = start_server(family, SOCK_STREAM, addr, port, 0); + if (!ASSERT_GE(listen_fd, 0, "listen")) + goto done; + + close_netns(nstoken); + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns src")) + goto done; + + client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto done; + + accept_fd = accept(listen_fd, NULL, NULL); + if (!ASSERT_GE(accept_fd, 0, "accept")) + goto done; + + if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo")) + goto done; + + n = write(client_fd, buf, sizeof(buf)); + if (!ASSERT_EQ(n, sizeof(buf), "send to server")) + goto done; + + n = read(accept_fd, buf, sizeof(buf)); + ASSERT_EQ(n, sizeof(buf), "recv from server"); + +done: + if (nstoken) + close_netns(nstoken); + if (listen_fd >= 0) + close(listen_fd); + if (accept_fd >= 0) + close(accept_fd); + if (client_fd >= 0) + close(client_fd); +} + +static int test_ping(int family, const char *addr) +{ + const char *ping = family == AF_INET6 ? "ping6" : "ping"; + + SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr); + return 0; +fail: + return -1; +} + +static void test_connectivity(void) +{ + test_tcp(AF_INET, IP4_DST, IP4_PORT); + test_ping(AF_INET, IP4_DST); + test_tcp(AF_INET6, IP6_DST, IP6_PORT); + test_ping(AF_INET6, IP6_DST); +} + +static int set_forwarding(bool enable) +{ + int err; + + err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0"); + if (!ASSERT_OK(err, "set ipv4.ip_forward=0")) + return err; + + err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0"); + if (!ASSERT_OK(err, "set ipv6.forwarding=0")) + return err; + + return 0; +} + +static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) +{ + struct nstoken *nstoken = NULL; + struct test_tc_neigh_fib *skel = NULL; + int err; + + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + return; + + skel = test_tc_neigh_fib__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open")) + goto done; + + if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) + goto done; + + err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto done; + + if (netns_load_bpf()) + goto done; + + /* bpf_fib_lookup() checks if forwarding is enabled */ + if (!ASSERT_OK(set_forwarding(true), "enable forwarding")) + goto done; + + test_connectivity(); + +done: + if (skel) + test_tc_neigh_fib__destroy(skel); + close_netns(nstoken); +} + +static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) +{ + struct nstoken *nstoken = NULL; + struct test_tc_neigh *skel = NULL; + int err; + + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + return; + + skel = test_tc_neigh__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) + goto done; + + skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + + err = test_tc_neigh__load(skel); + if (!ASSERT_OK(err, "test_tc_neigh__load")) + goto done; + + err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto done; + + if (netns_load_bpf()) + goto done; + + if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) + goto done; + + test_connectivity(); + +done: + if (skel) + test_tc_neigh__destroy(skel); + close_netns(nstoken); +} + +static void test_tc_redirect_peer(struct netns_setup_result *setup_result) +{ + struct nstoken *nstoken; + struct test_tc_peer *skel; + int err; + + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns fwd")) + return; + + skel = test_tc_peer__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) + goto done; + + skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + + err = test_tc_peer__load(skel); + if (!ASSERT_OK(err, "test_tc_peer__load")) + goto done; + + err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto done; + + if (netns_load_bpf()) + goto done; + + if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) + goto done; + + test_connectivity(); + +done: + if (skel) + test_tc_peer__destroy(skel); + close_netns(nstoken); +} + +static int tun_open(char *name) +{ + struct ifreq ifr; + int fd, err; + + fd = open("/dev/net/tun", O_RDWR); + if (!ASSERT_GE(fd, 0, "open /dev/net/tun")) + return -1; + + memset(&ifr, 0, sizeof(ifr)); + + ifr.ifr_flags = IFF_TUN | IFF_NO_PI; + if (*name) + strncpy(ifr.ifr_name, name, IFNAMSIZ); + + err = ioctl(fd, TUNSETIFF, &ifr); + if (!ASSERT_OK(err, "ioctl TUNSETIFF")) + goto fail; + + SYS("ip link set dev %s up", name); + + return fd; +fail: + close(fd); + return -1; +} + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +enum { + SRC_TO_TARGET = 0, + TARGET_TO_SRC = 1, +}; + +static int tun_relay_loop(int src_fd, int target_fd) +{ + fd_set rfds, wfds; + + FD_ZERO(&rfds); + FD_ZERO(&wfds); + + for (;;) { + char buf[1500]; + int direction, nread, nwrite; + + FD_SET(src_fd, &rfds); + FD_SET(target_fd, &rfds); + + if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) { + log_err("select failed"); + return 1; + } + + direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC; + + nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf)); + if (nread < 0) { + log_err("read failed"); + return 1; + } + + nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread); + if (nwrite != nread) { + log_err("write failed"); + return 1; + } + } +} + +static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) +{ + struct test_tc_peer *skel = NULL; + struct nstoken *nstoken = NULL; + int err; + int tunnel_pid = -1; + int src_fd, target_fd; + int ifindex; + + /* Start a L3 TUN/TAP tunnel between the src and dst namespaces. + * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those + * expose the L2 headers encapsulating the IP packet to BPF and hence + * don't have skb in suitable state for this test. Alternative to TUN/TAP + * would be e.g. Wireguard which would appear as a pure L3 device to BPF, + * but that requires much more complicated setup. + */ + nstoken = open_netns(NS_SRC); + if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) + return; + + src_fd = tun_open("tun_src"); + if (!ASSERT_GE(src_fd, 0, "tun_open tun_src")) + goto fail; + + close_netns(nstoken); + + nstoken = open_netns(NS_FWD); + if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) + goto fail; + + target_fd = tun_open("tun_fwd"); + if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd")) + goto fail; + + tunnel_pid = fork(); + if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop")) + goto fail; + + if (tunnel_pid == 0) + exit(tun_relay_loop(src_fd, target_fd)); + + skel = test_tc_peer__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) + goto fail; + + ifindex = get_ifindex("tun_fwd"); + if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd")) + goto fail; + + skel->rodata->IFINDEX_SRC = ifindex; + skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + + err = test_tc_peer__load(skel); + if (!ASSERT_OK(err, "test_tc_peer__load")) + goto fail; + + err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto fail; + + err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto fail; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto fail; + + /* Load "tc_src_l3" to the tun_fwd interface to redirect packets + * towards dst, and "tc_dst" to redirect packets + * and "tc_chk" on veth_dst_fwd to drop non-redirected packets. + */ + SYS("tc qdisc add dev tun_fwd clsact"); + SYS("tc filter add dev tun_fwd ingress bpf da object-pinned " + SRC_PROG_PIN_FILE); + + SYS("tc qdisc add dev veth_dst_fwd clsact"); + SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " + DST_PROG_PIN_FILE); + SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " + CHK_PROG_PIN_FILE); + + /* Setup route and neigh tables */ + SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); + SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24"); + + SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); + SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); + + SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global"); + SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD + " dev tun_src scope global"); + SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global"); + SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global"); + SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD + " dev tun_src scope global"); + SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global"); + + SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD); + + if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) + goto fail; + + test_connectivity(); + +fail: + if (tunnel_pid > 0) { + kill(tunnel_pid, SIGTERM); + waitpid(tunnel_pid, NULL, 0); + } + if (src_fd >= 0) + close(src_fd); + if (target_fd >= 0) + close(target_fd); + if (skel) + test_tc_peer__destroy(skel); + if (nstoken) + close_netns(nstoken); +} + +#define RUN_TEST(name) \ + ({ \ + struct netns_setup_result setup_result; \ + if (test__start_subtest(#name)) \ + if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \ + if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \ + "setup links and routes")) \ + test_ ## name(&setup_result); \ + netns_setup_namespaces("delete"); \ + } \ + }) + +static void *test_tc_redirect_run_tests(void *arg) +{ + RUN_TEST(tc_redirect_peer); + RUN_TEST(tc_redirect_peer_l3); + RUN_TEST(tc_redirect_neigh); + RUN_TEST(tc_redirect_neigh_fib); + return NULL; +} + +void test_tc_redirect(void) +{ + pthread_t test_thread; + int err; + + /* Run the tests in their own thread to isolate the namespace changes + * so they do not affect the environment of other tests. + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) + */ + err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL); + if (ASSERT_OK(err, "pthread_create")) + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 08d19cafd5e8..1fa772079967 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -353,8 +353,7 @@ static void fastopen_estab(void) return; link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); - if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", - PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)")) return; if (sk_fds_connect(&sk_fds, true)) { @@ -398,8 +397,7 @@ static void syncookie_estab(void) return; link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); - if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", - PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)")) return; if (sk_fds_connect(&sk_fds, false)) { @@ -431,8 +429,7 @@ static void fin(void) return; link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); - if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", - PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)")) return; if (sk_fds_connect(&sk_fds, false)) { @@ -471,8 +468,7 @@ static void __simple_estab(bool exprm) return; link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); - if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", - PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)")) return; if (sk_fds_connect(&sk_fds, false)) { @@ -509,8 +505,7 @@ static void misc(void) return; link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd); - if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n", - PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_cgroup(misc_estab)")) return; if (sk_fds_connect(&sk_fds, false)) { diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c index 9966685866fd..123c68c1917d 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c +++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c @@ -73,7 +73,7 @@ void test_test_overhead(void) return; obj = bpf_object__open_file("./test_overhead.o", NULL); - if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name); @@ -108,7 +108,7 @@ void test_test_overhead(void) /* attach kprobe */ link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */, kprobe_func); - if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_kprobe")) goto cleanup; test_run("kprobe"); bpf_link__destroy(link); @@ -116,28 +116,28 @@ void test_test_overhead(void) /* attach kretprobe */ link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */, kprobe_func); - if (CHECK(IS_ERR(link), "attach kretprobe", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_kretprobe")) goto cleanup; test_run("kretprobe"); bpf_link__destroy(link); /* attach raw_tp */ link = bpf_program__attach_raw_tracepoint(raw_tp_prog, "task_rename"); - if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_raw_tp")) goto cleanup; test_run("raw_tp"); bpf_link__destroy(link); /* attach fentry */ link = bpf_program__attach_trace(fentry_prog); - if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_fentry")) goto cleanup; test_run("fentry"); bpf_link__destroy(link); /* attach fexit */ link = bpf_program__attach_trace(fexit_prog); - if (CHECK(IS_ERR(link), "attach fexit", "err %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "attach_fexit")) goto cleanup; test_run("fexit"); bpf_link__destroy(link); diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c index 39b0decb1bb2..d39bc00feb45 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c @@ -3,7 +3,7 @@ #include <test_progs.h> -#include "trace_printk.skel.h" +#include "trace_printk.lskel.h" #define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "testing,testing" @@ -21,6 +21,9 @@ void test_trace_printk(void) if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) return; + ASSERT_EQ(skel->rodata->fmt[0], 'T', "invalid printk fmt string"); + skel->rodata->fmt[0] = 't'; + err = trace_printk__load(skel); if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index f3022d934e2d..d7f5a931d7f3 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -55,7 +55,7 @@ void test_trampoline_count(void) /* attach 'allowed' trampoline programs */ for (i = 0; i < MAX_TRAMP_PROGS; i++) { obj = bpf_object__open_file(object, NULL); - if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) { + if (!ASSERT_OK_PTR(obj, "obj_open_file")) { obj = NULL; goto cleanup; } @@ -68,14 +68,14 @@ void test_trampoline_count(void) if (rand() % 2) { link = load(inst[i].obj, fentry_name); - if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) { + if (!ASSERT_OK_PTR(link, "attach_prog")) { link = NULL; goto cleanup; } inst[i].link_fentry = link; } else { link = load(inst[i].obj, fexit_name); - if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) { + if (!ASSERT_OK_PTR(link, "attach_prog")) { link = NULL; goto cleanup; } @@ -85,7 +85,7 @@ void test_trampoline_count(void) /* and try 1 extra.. */ obj = bpf_object__open_file(object, NULL); - if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) { + if (!ASSERT_OK_PTR(obj, "obj_open_file")) { obj = NULL; goto cleanup; } @@ -96,13 +96,15 @@ void test_trampoline_count(void) /* ..that needs to fail */ link = load(obj, fentry_name); - if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) { + err = libbpf_get_error(link); + if (!ASSERT_ERR_PTR(link, "cannot attach over the limit")) { bpf_link__destroy(link); goto cleanup_extra; } /* with E2BIG error */ - CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link)); + ASSERT_EQ(err, -E2BIG, "proper error check"); + ASSERT_EQ(link, NULL, "ptr_is_null"); /* and finaly execute the probe */ if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L))) diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c index 2aba09d4d01b..56c9d6bd38a3 100644 --- a/tools/testing/selftests/bpf/prog_tests/udp_limit.c +++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c @@ -22,11 +22,10 @@ void test_udp_limit(void) goto close_cgroup_fd; skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.sock, "cg_attach_sock")) + goto close_skeleton; skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd); - if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release), - "cg-attach", "sock %ld sock_release %ld", - PTR_ERR(skel->links.sock), - PTR_ERR(skel->links.sock_release))) + if (!ASSERT_OK_PTR(skel->links.sock_release, "cg_attach_sock_release")) goto close_skeleton; /* BPF program enforces a single UDP socket per cgroup, diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index 2c6c570b21f8..3bd5904b4db5 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -90,7 +90,7 @@ void test_xdp_bpf2bpf(void) pb_opts.ctx = &passed; pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1, &pb_opts); - if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + if (!ASSERT_OK_PTR(pb, "perf_buf__new")) goto out; /* Run test program */ diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c index 6f814999b395..46eed0a33c23 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c @@ -51,7 +51,7 @@ void test_xdp_link(void) /* BPF link is not allowed to replace prog attachment */ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO); - if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) { + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { bpf_link__destroy(link); /* best-effort detach prog */ opts.old_fd = prog_fd1; @@ -67,7 +67,7 @@ void test_xdp_link(void) /* now BPF link should attach successfully */ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO); - if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "link_attach")) goto cleanup; skel1->links.xdp_handler = link; @@ -95,7 +95,7 @@ void test_xdp_link(void) /* BPF link is not allowed to replace another BPF link */ link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO); - if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) { + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { bpf_link__destroy(link); goto cleanup; } @@ -105,7 +105,7 @@ void test_xdp_link(void) /* new link attach should succeed */ link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO); - if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link))) + if (!ASSERT_OK_PTR(link, "link_attach")) goto cleanup; skel2->links.xdp_handler = link; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c index 6dfce3fd68bc..0aa3cd34cbe3 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c index b83b5d2e17dc..6c39e86b666f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c index d58d9f1642b5..784a610ce039 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c @@ -3,7 +3,6 @@ #include "bpf_iter.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c index 95989f4c99b5..a28e51e2dcee 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c @@ -3,7 +3,6 @@ #include "bpf_iter.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c index b7f32c160f4e..c86b93f33b32 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c index a1ddc36f13ec..bca8b889cb10 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020, Oracle and/or its affiliates. */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> #include <errno.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c index b2f7c7c5f952..6e7b400888fe 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c index 43c36f5f7649..f2b8167b72a8 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c index 11d1aa37cf11..4ea6a37d1345 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c @@ -2,7 +2,6 @@ /* Copyright (c) 2020 Facebook */ #include "bpf_iter.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c index 54380c5e1069..2e4775c35414 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -3,7 +3,6 @@ #include "bpf_iter.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> #include <bpf/bpf_endian.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c index b4fbddfa4e10..943f7bba180e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -3,7 +3,6 @@ #include "bpf_iter.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> #include <bpf/bpf_endian.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c index ee49493dc125..400fdf8d6233 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c @@ -9,8 +9,8 @@ __u32 map1_id = 0, map2_id = 0; __u32 map1_accessed = 0, map2_accessed = 0; __u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0; -static volatile const __u32 print_len; -static volatile const __u32 ret1; +volatile const __u32 print_len; +volatile const __u32 ret1; SEC("iter/bpf_map") int dump_bpf_map(struct bpf_iter__bpf_map *ctx) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c index f258583afbbd..cf0c485b1ed7 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c @@ -3,7 +3,6 @@ #include "bpf_iter.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> #include <bpf/bpf_endian.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c index 65f93bb03f0f..5031e21c433f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c @@ -3,7 +3,6 @@ #include "bpf_iter.h" #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> #include <bpf/bpf_endian.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c index a46a264ce24e..55e283050cab 100644 --- a/tools/testing/selftests/bpf/progs/kfree_skb.c +++ b/tools/testing/selftests/bpf/progs/kfree_skb.c @@ -109,10 +109,10 @@ int BPF_PROG(trace_kfree_skb, struct sk_buff *skb, void *location) return 0; } -static volatile struct { +struct { bool fentry_test_ok; bool fexit_test_ok; -} result; +} result = {}; SEC("fentry/eth_type_trans") int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, struct net_device *dev, diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c index 52291515cc72..00bf1ca95986 100644 --- a/tools/testing/selftests/bpf/progs/linked_maps1.c +++ b/tools/testing/selftests/bpf/progs/linked_maps1.c @@ -75,7 +75,7 @@ int BPF_PROG(handler_exit1) val = bpf_map_lookup_elem(&map_weak, &key); if (val) output_weak1 = *val; - + return 0; } diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c new file mode 100644 index 000000000000..e550f728962d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/syscall.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <../../../tools/include/linux/filter.h> +#include <linux/btf.h> + +char _license[] SEC("license") = "GPL"; + +struct args { + __u64 log_buf; + __u32 log_size; + int max_entries; + int map_fd; + int prog_fd; + int btf_fd; +}; + +#define BTF_INFO_ENC(kind, kind_flag, vlen) \ + ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) +#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) +#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \ + ((encoding) << 24 | (bits_offset) << 16 | (nr_bits)) +#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ + BTF_INT_ENC(encoding, bits_offset, bits) + +static int btf_load(void) +{ + struct btf_blob { + struct btf_header btf_hdr; + __u32 types[8]; + __u32 str; + } raw_btf = { + .btf_hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = sizeof(__u32) * 8, + .str_off = sizeof(__u32) * 8, + .str_len = sizeof(__u32), + }, + .types = { + /* long */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [1] */ + /* unsigned long */ + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ + }, + }; + static union bpf_attr btf_load_attr = { + .btf_size = sizeof(raw_btf), + }; + + btf_load_attr.btf = (long)&raw_btf; + return bpf_sys_bpf(BPF_BTF_LOAD, &btf_load_attr, sizeof(btf_load_attr)); +} + +SEC("syscall") +int bpf_prog(struct args *ctx) +{ + static char license[] = "GPL"; + static struct bpf_insn insns[] = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + static union bpf_attr map_create_attr = { + .map_type = BPF_MAP_TYPE_HASH, + .key_size = 8, + .value_size = 8, + .btf_key_type_id = 1, + .btf_value_type_id = 2, + }; + static union bpf_attr map_update_attr = { .map_fd = 1, }; + static __u64 key = 12; + static __u64 value = 34; + static union bpf_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + .insn_cnt = sizeof(insns) / sizeof(insns[0]), + }; + int ret; + + ret = btf_load(); + if (ret <= 0) + return ret; + + ctx->btf_fd = ret; + map_create_attr.max_entries = ctx->max_entries; + map_create_attr.btf_fd = ret; + + prog_load_attr.license = (long) license; + prog_load_attr.insns = (long) insns; + prog_load_attr.log_buf = ctx->log_buf; + prog_load_attr.log_size = ctx->log_size; + prog_load_attr.log_level = 1; + + ret = bpf_sys_bpf(BPF_MAP_CREATE, &map_create_attr, sizeof(map_create_attr)); + if (ret <= 0) + return ret; + ctx->map_fd = ret; + insns[3].imm = ret; + + map_update_attr.map_fd = ret; + map_update_attr.key = (long) &key; + map_update_attr.value = (long) &value; + ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr)); + if (ret < 0) + return ret; + + ret = bpf_sys_bpf(BPF_PROG_LOAD, &prog_load_attr, sizeof(prog_load_attr)); + if (ret <= 0) + return ret; + ctx->prog_fd = ret; + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c index 739dc2a51e74..910858fe078a 100644 --- a/tools/testing/selftests/bpf/progs/tailcall3.c +++ b/tools/testing/selftests/bpf/progs/tailcall3.c @@ -10,7 +10,7 @@ struct { __uint(value_size, sizeof(__u32)); } jmp_table SEC(".maps"); -static volatile int count; +int count = 0; SEC("classifier/0") int bpf_func_0(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c index f82075b47d7d..bd4be135c39d 100644 --- a/tools/testing/selftests/bpf/progs/tailcall4.c +++ b/tools/testing/selftests/bpf/progs/tailcall4.c @@ -10,7 +10,7 @@ struct { __uint(value_size, sizeof(__u32)); } jmp_table SEC(".maps"); -static volatile int selector; +int selector = 0; #define TAIL_FUNC(x) \ SEC("classifier/" #x) \ diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c index ce5450744fd4..adf30a33064e 100644 --- a/tools/testing/selftests/bpf/progs/tailcall5.c +++ b/tools/testing/selftests/bpf/progs/tailcall5.c @@ -10,7 +10,7 @@ struct { __uint(value_size, sizeof(__u32)); } jmp_table SEC(".maps"); -static volatile int selector; +int selector = 0; #define TAIL_FUNC(x) \ SEC("classifier/" #x) \ diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c index 7b1c04183824..3cc4c12817b5 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c @@ -20,7 +20,7 @@ int subprog_tail(struct __sk_buff *skb) return 1; } -static volatile int count; +int count = 0; SEC("classifier/0") int bpf_func_0(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c index 9a1b166b7fbe..77df6d4db895 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c @@ -9,7 +9,7 @@ struct { __uint(value_size, sizeof(__u32)); } jmp_table SEC(".maps"); -static volatile int count; +int count = 0; __noinline int subprog_tail_2(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c index c4a9bae96e75..71184af57749 100644 --- a/tools/testing/selftests/bpf/progs/test_check_mtu.c +++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c @@ -11,8 +11,8 @@ char _license[] SEC("license") = "GPL"; /* Userspace will update with MTU it can see on device */ -static volatile const int GLOBAL_USER_MTU; -static volatile const __u32 GLOBAL_USER_IFINDEX; +volatile const int GLOBAL_USER_MTU; +volatile const __u32 GLOBAL_USER_IFINDEX; /* BPF-prog will update these with MTU values it can see */ __u32 global_bpf_mtu_xdp = 0; diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index 3c1e042962e6..e2a5acc4785c 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -39,8 +39,8 @@ char _license[] SEC("license") = "Dual BSD/GPL"; /** * Destination port and IP used for UDP encapsulation. */ -static volatile const __be16 ENCAPSULATION_PORT; -static volatile const __be32 ENCAPSULATION_IP; +volatile const __be16 ENCAPSULATION_PORT; +volatile const __be32 ENCAPSULATION_IP; typedef struct { uint64_t processed_packets_total; diff --git a/tools/testing/selftests/bpf/progs/test_global_func_args.c b/tools/testing/selftests/bpf/progs/test_global_func_args.c index cae309538a9e..e712bf77daae 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func_args.c +++ b/tools/testing/selftests/bpf/progs/test_global_func_args.c @@ -8,7 +8,7 @@ struct S { int v; }; -static volatile struct S global_variable; +struct S global_variable = {}; struct { __uint(type, BPF_MAP_TYPE_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c new file mode 100644 index 000000000000..3a193f42c7e7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +__u32 set_pid = 0; +__u64 set_key = 0; +__u64 set_value = 0; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 2); + __type(key, __u64); + __type(value, __u64); +} hash_map SEC(".maps"); + +SEC("tp/syscalls/sys_enter_getpgid") +int bpf_lookup_and_delete_test(const void *ctx) +{ + if (set_pid == bpf_get_current_pid_tgid() >> 32) + bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c new file mode 100644 index 000000000000..27df571abf5b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check if we can migrate child sockets. + * + * 1. If reuse_md->migrating_sk is NULL (SYN packet), + * return SK_PASS without selecting a listener. + * 2. If reuse_md->migrating_sk is not NULL (socket migration), + * select a listener (reuseport_map[migrate_map[cookie]]) + * + * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> + */ + +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/in.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + __uint(max_entries, 256); + __type(key, int); + __type(value, __u64); +} reuseport_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 256); + __type(key, __u64); + __type(value, int); +} migrate_map SEC(".maps"); + +int migrated_at_close = 0; +int migrated_at_close_fastopen = 0; +int migrated_at_send_synack = 0; +int migrated_at_recv_ack = 0; +__be16 server_port; + +SEC("xdp") +int drop_ack(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct ethhdr *eth = data; + struct tcphdr *tcp = NULL; + + if (eth + 1 > data_end) + goto pass; + + switch (bpf_ntohs(eth->h_proto)) { + case ETH_P_IP: { + struct iphdr *ip = (struct iphdr *)(eth + 1); + + if (ip + 1 > data_end) + goto pass; + + if (ip->protocol != IPPROTO_TCP) + goto pass; + + tcp = (struct tcphdr *)((void *)ip + ip->ihl * 4); + break; + } + case ETH_P_IPV6: { + struct ipv6hdr *ipv6 = (struct ipv6hdr *)(eth + 1); + + if (ipv6 + 1 > data_end) + goto pass; + + if (ipv6->nexthdr != IPPROTO_TCP) + goto pass; + + tcp = (struct tcphdr *)(ipv6 + 1); + break; + } + default: + goto pass; + } + + if (tcp + 1 > data_end) + goto pass; + + if (tcp->dest != server_port) + goto pass; + + if (!tcp->syn && tcp->ack) + return XDP_DROP; + +pass: + return XDP_PASS; +} + +SEC("sk_reuseport/migrate") +int migrate_reuseport(struct sk_reuseport_md *reuse_md) +{ + int *key, flags = 0, state, err; + __u64 cookie; + + if (!reuse_md->migrating_sk) + return SK_PASS; + + state = reuse_md->migrating_sk->state; + cookie = bpf_get_socket_cookie(reuse_md->sk); + + key = bpf_map_lookup_elem(&migrate_map, &cookie); + if (!key) + return SK_DROP; + + err = bpf_sk_select_reuseport(reuse_md, &reuseport_map, key, flags); + if (err) + return SK_PASS; + + switch (state) { + case BPF_TCP_ESTABLISHED: + __sync_fetch_and_add(&migrated_at_close, 1); + break; + case BPF_TCP_SYN_RECV: + __sync_fetch_and_add(&migrated_at_close_fastopen, 1); + break; + case BPF_TCP_NEW_SYN_RECV: + if (!reuse_md->len) + __sync_fetch_and_add(&migrated_at_send_synack, 1); + else + __sync_fetch_and_add(&migrated_at_recv_ack, 1); + break; + } + + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c index ecbeea2df259..fc8e8a34a3db 100644 --- a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c +++ b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c @@ -5,7 +5,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -static volatile const struct { +const struct { unsigned a[4]; /* * if the struct's size is multiple of 16, compiler will put it into @@ -15,11 +15,11 @@ static volatile const struct { char _y; } rdonly_values = { .a = {2, 3, 4, 5} }; -static volatile struct { +struct { unsigned did_run; unsigned iters; unsigned sum; -} res; +} res = {}; SEC("raw_tracepoint/sys_enter:skip_loop") int skip_loop(struct pt_regs *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c index 6b3f288b7c63..eaa7d9dba0be 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c @@ -35,7 +35,7 @@ long prod_pos = 0; /* inner state */ long seq = 0; -SEC("tp/syscalls/sys_enter_getpgid") +SEC("fentry/__x64_sys_getpgid") int test_ringbuf(void *ctx) { int cur_pid = bpf_get_current_pid_tgid() >> 32; @@ -48,7 +48,7 @@ int test_ringbuf(void *ctx) sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0); if (!sample) { __sync_fetch_and_add(&dropped, 1); - return 1; + return 0; } sample->pid = pid; diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 374ccef704e1..441fa1c552c8 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -38,11 +38,11 @@ extern int LINUX_KERNEL_VERSION __kconfig; bool bpf_syscall = 0; int kern_ver = 0; +struct s out5 = {}; + SEC("raw_tp/sys_enter") int handler(const void *ctx) { - static volatile struct s out5; - out1 = in1; out2 = in2; out3 = in3; diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c index e35129bea0a0..e2ad26150f9b 100644 --- a/tools/testing/selftests/bpf/progs/test_snprintf.c +++ b/tools/testing/selftests/bpf/progs/test_snprintf.c @@ -3,7 +3,6 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> __u32 pid = 0; diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c index 402adaf344f9..3095837334d3 100644 --- a/tools/testing/selftests/bpf/progs/test_snprintf_single.c +++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c @@ -5,7 +5,7 @@ #include <bpf/bpf_helpers.h> /* The format string is filled from the userspace such that loading fails */ -static const char fmt[10]; +const char fmt[10]; SEC("raw_tp/sys_enter") int handler(const void *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c index a39eba9f5201..a1cc58b10c7c 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c @@ -28,8 +28,8 @@ struct { __type(value, unsigned int); } verdict_map SEC(".maps"); -static volatile bool test_sockmap; /* toggled by user-space */ -static volatile bool test_ingress; /* toggled by user-space */ +bool test_sockmap = false; /* toggled by user-space */ +bool test_ingress = false; /* toggled by user-space */ SEC("sk_skb/stream_parser") int prog_stream_parser(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/test_static_linked1.c b/tools/testing/selftests/bpf/progs/test_static_linked1.c index ea1a6c4c7172..4f0b612e1661 100644 --- a/tools/testing/selftests/bpf/progs/test_static_linked1.c +++ b/tools/testing/selftests/bpf/progs/test_static_linked1.c @@ -4,10 +4,10 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -/* 8-byte aligned .bss */ -static volatile long static_var1; -static volatile int static_var11; -int var1 = 0; +/* 8-byte aligned .data */ +static volatile long static_var1 = 2; +static volatile int static_var2 = 3; +int var1 = -1; /* 4-byte aligned .rodata */ const volatile int rovar1; @@ -21,7 +21,7 @@ static __noinline int subprog(int x) SEC("raw_tp/sys_enter") int handler1(const void *ctx) { - var1 = subprog(rovar1) + static_var1 + static_var11; + var1 = subprog(rovar1) + static_var1 + static_var2; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_static_linked2.c b/tools/testing/selftests/bpf/progs/test_static_linked2.c index 54d8d1ab577c..766ebd502a60 100644 --- a/tools/testing/selftests/bpf/progs/test_static_linked2.c +++ b/tools/testing/selftests/bpf/progs/test_static_linked2.c @@ -4,10 +4,10 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -/* 4-byte aligned .bss */ -static volatile int static_var2; -static volatile int static_var22; -int var2 = 0; +/* 4-byte aligned .data */ +static volatile int static_var1 = 5; +static volatile int static_var2 = 6; +int var2 = -1; /* 8-byte aligned .rodata */ const volatile long rovar2; @@ -21,7 +21,7 @@ static __noinline int subprog(int x) SEC("raw_tp/sys_enter") int handler2(const void *ctx) { - var2 = subprog(rovar2) + static_var2 + static_var22; + var2 = subprog(rovar2) + static_var1 + static_var2; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c index d3c5673c0218..b7c37ca09544 100644 --- a/tools/testing/selftests/bpf/progs/test_subprogs.c +++ b/tools/testing/selftests/bpf/progs/test_subprogs.c @@ -4,8 +4,18 @@ const char LICENSE[] SEC("license") = "GPL"; +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} array SEC(".maps"); + __noinline int sub1(int x) { + int key = 0; + + bpf_map_lookup_elem(&array, &key); return x + 1; } @@ -23,6 +33,9 @@ static __noinline int sub3(int z) static __noinline int sub4(int w) { + int key = 0; + + bpf_map_lookup_elem(&array, &key); return w + sub3(5) + sub1(6); } diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c new file mode 100644 index 000000000000..18a3a7ed924a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +/* Dummy prog to test TC-BPF API */ + +SEC("classifier") +int cls(struct __sk_buff *skb) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index b985ac4e7a81..0c93d326a663 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -33,17 +33,8 @@ a.s6_addr32[3] == b.s6_addr32[3]) #endif -enum { - dev_src, - dev_dst, -}; - -struct bpf_map_def SEC("maps") ifindex_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 2, -}; +volatile const __u32 IFINDEX_SRC; +volatile const __u32 IFINDEX_DST; static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb, __be32 addr) @@ -79,14 +70,8 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb, return v6_equal(ip6h->daddr, addr); } -static __always_inline int get_dev_ifindex(int which) -{ - int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); - - return ifindex ? *ifindex : 0; -} - -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +SEC("classifier/chk_egress") +int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); @@ -98,7 +83,8 @@ SEC("chk_egress") int tc_chk(struct __sk_buff *skb) return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK; } -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +SEC("classifier/dst_ingress") +int tc_dst(struct __sk_buff *skb) { __u8 zero[ETH_ALEN * 2]; bool redirect = false; @@ -119,10 +105,11 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0); + return bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0); } -SEC("src_ingress") int tc_src(struct __sk_buff *skb) +SEC("classifier/src_ingress") +int tc_src(struct __sk_buff *skb) { __u8 zero[ETH_ALEN * 2]; bool redirect = false; @@ -143,7 +130,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb) if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0); + return bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c index d82ed3457030..f7ab69cf018e 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c @@ -75,7 +75,8 @@ static __always_inline int fill_fib_params_v6(struct __sk_buff *skb, return 0; } -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +SEC("classifier/chk_egress") +int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); @@ -142,12 +143,14 @@ static __always_inline int tc_redir(struct __sk_buff *skb) /* these are identical, but keep them separate for compatibility with the * section names expected by test_tc_redirect.sh */ -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +SEC("classifier/dst_ingress") +int tc_dst(struct __sk_buff *skb) { return tc_redir(skb); } -SEC("src_ingress") int tc_src(struct __sk_buff *skb) +SEC("classifier/src_ingress") +int tc_src(struct __sk_buff *skb) { return tc_redir(skb); } diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c index fc84a7685aa2..fe818cd5f010 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_peer.c +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -5,41 +5,59 @@ #include <linux/bpf.h> #include <linux/stddef.h> #include <linux/pkt_cls.h> +#include <linux/if_ether.h> +#include <linux/ip.h> #include <bpf/bpf_helpers.h> -enum { - dev_src, - dev_dst, -}; +volatile const __u32 IFINDEX_SRC; +volatile const __u32 IFINDEX_DST; -struct bpf_map_def SEC("maps") ifindex_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 2, -}; +static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}; +static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}; -static __always_inline int get_dev_ifindex(int which) +SEC("classifier/chk_egress") +int tc_chk(struct __sk_buff *skb) { - int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); + return TC_ACT_SHOT; +} - return ifindex ? *ifindex : 0; +SEC("classifier/dst_ingress") +int tc_dst(struct __sk_buff *skb) +{ + return bpf_redirect_peer(IFINDEX_SRC, 0); } -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +SEC("classifier/src_ingress") +int tc_src(struct __sk_buff *skb) { - return TC_ACT_SHOT; + return bpf_redirect_peer(IFINDEX_DST, 0); } -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +SEC("classifier/dst_ingress_l3") +int tc_dst_l3(struct __sk_buff *skb) { - return bpf_redirect_peer(get_dev_ifindex(dev_src), 0); + return bpf_redirect(IFINDEX_SRC, 0); } -SEC("src_ingress") int tc_src(struct __sk_buff *skb) +SEC("classifier/src_ingress_l3") +int tc_src_l3(struct __sk_buff *skb) { - return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0); + __u16 proto = skb->protocol; + + if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0) + return TC_ACT_SHOT; + + if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0) + return TC_ACT_SHOT; + + if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0) + return TC_ACT_SHOT; + + if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0) + return TC_ACT_SHOT; + + return bpf_redirect_peer(IFINDEX_DST, 0); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c index 8ca7f399b670..119582aa105a 100644 --- a/tools/testing/selftests/bpf/progs/trace_printk.c +++ b/tools/testing/selftests/bpf/progs/trace_printk.c @@ -10,11 +10,11 @@ char _license[] SEC("license") = "GPL"; int trace_printk_ret = 0; int trace_printk_ran = 0; -SEC("tp/raw_syscalls/sys_enter") +const char fmt[] = "Testing,testing %d\n"; + +SEC("fentry/__x64_sys_nanosleep") int sys_enter(void *ctx) { - static const char fmt[] = "testing,testing %d\n"; - trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt), ++trace_printk_ran); return 0; diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c new file mode 100644 index 000000000000..880debcbcd65 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 +#define KBUILD_MODNAME "foo" +#include <string.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +/* One map use devmap, another one use devmap_hash for testing */ +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(max_entries, 1024); +} map_all SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 128); +} map_egress SEC(".maps"); + +/* map to store egress interfaces mac addresses */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, __be64); + __uint(max_entries, 128); +} mac_map SEC(".maps"); + +SEC("xdp_redirect_map_multi") +int xdp_redirect_map_multi_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + int if_index = ctx->ingress_ifindex; + struct ethhdr *eth = data; + __u16 h_proto; + __u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + h_proto = eth->h_proto; + + /* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */ + if (h_proto == bpf_htons(ETH_P_IP)) + return bpf_redirect_map(&map_all, 0, + BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); + /* Using IPv6 for none flag testing */ + else if (h_proto == bpf_htons(ETH_P_IPV6)) + return bpf_redirect_map(&map_all, if_index, 0); + /* All others for BPF_F_BROADCAST testing */ + else + return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST); +} + +/* The following 2 progs are for 2nd devmap prog testing */ +SEC("xdp_redirect_map_ingress") +int xdp_redirect_map_all_prog(struct xdp_md *ctx) +{ + return bpf_redirect_map(&map_egress, 0, + BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); +} + +SEC("xdp_devmap/map_prog") +int xdp_devmap_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 key = ctx->egress_ifindex; + struct ethhdr *eth = data; + __u64 nh_off; + __be64 *mac; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + mac = bpf_map_lookup_elem(&mac_map, &key); + if (mac) + __builtin_memcpy(eth->h_source, mac, ETH_ALEN); + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh index 7eb940a7b2eb..ed12111cd2f0 100755 --- a/tools/testing/selftests/bpf/test_doc_build.sh +++ b/tools/testing/selftests/bpf/test_doc_build.sh @@ -1,5 +1,6 @@ #!/bin/bash # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +set -e # Assume script is located under tools/testing/selftests/bpf/. We want to start # build attempts from the top of kernel repository. diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index 6a5349f9eb14..7e9049fa3edf 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -231,6 +231,14 @@ static void test_lru_sanity0(int map_type, int map_flags) assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 && errno == ENOENT); + /* lookup elem key=1 and delete it, then check it doesn't exist */ + key = 1; + assert(!bpf_map_lookup_and_delete_elem(lru_map_fd, &key, &value)); + assert(value[0] == 1234); + + /* remove the same element from the expected map */ + assert(!bpf_map_delete_elem(expected_map_fd, &key)); + assert(map_equal(lru_map_fd, expected_map_fd)); close(expected_map_fd); diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 51adc42b2b40..30cbf5d98f7d 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -53,23 +53,30 @@ static void test_hashmap(unsigned int task, void *data) value = 0; /* BPF_NOEXIST means add new element if it doesn't exist. */ - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 && /* key=1 already exists. */ errno == EEXIST); /* -1 is an invalid flag. */ - assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, -1) < 0 && errno == EINVAL); /* Check that key=1 can be found. */ assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234); key = 2; + value = 1234; + /* Insert key=2 element. */ + assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); + + /* Check that key=2 matches the value and delete it */ + assert(bpf_map_lookup_and_delete_elem(fd, &key, &value) == 0 && value == 1234); + /* Check that key=2 is not found. */ - assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT); + assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT); /* BPF_EXIST means update existing element. */ - assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 && /* key=2 is not there. */ errno == ENOENT); @@ -80,7 +87,7 @@ static void test_hashmap(unsigned int task, void *data) * inserted due to max_entries limit. */ key = 0; - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 && errno == E2BIG); /* Update existing element, though the map is full. */ @@ -89,12 +96,12 @@ static void test_hashmap(unsigned int task, void *data) key = 2; assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); key = 3; - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 && errno == E2BIG); /* Check that key = 0 doesn't exist. */ key = 0; - assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); + assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT); /* Iterate over two elements. */ assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 && @@ -104,7 +111,7 @@ static void test_hashmap(unsigned int task, void *data) assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && (next_key == 1 || next_key == 2) && (next_key != first_key)); - assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 && errno == ENOENT); /* Delete both elements. */ @@ -112,13 +119,13 @@ static void test_hashmap(unsigned int task, void *data) assert(bpf_map_delete_elem(fd, &key) == 0); key = 2; assert(bpf_map_delete_elem(fd, &key) == 0); - assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); + assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT); key = 0; /* Check that map is empty. */ - assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 && errno == ENOENT); - assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 && errno == ENOENT); close(fd); @@ -166,15 +173,25 @@ static void test_hashmap_percpu(unsigned int task, void *data) /* Insert key=1 element. */ assert(!(expected_key_mask & key)); assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0); + + /* Lookup and delete elem key=1 and check value. */ + assert(bpf_map_lookup_and_delete_elem(fd, &key, value) == 0 && + bpf_percpu(value,0) == 100); + + for (i = 0; i < nr_cpus; i++) + bpf_percpu(value,i) = i + 100; + + /* Insert key=1 element which should not exist. */ + assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0); expected_key_mask |= key; /* BPF_NOEXIST means add new element if it doesn't exist. */ - assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 && /* key=1 already exists. */ errno == EEXIST); /* -1 is an invalid flag. */ - assert(bpf_map_update_elem(fd, &key, value, -1) == -1 && + assert(bpf_map_update_elem(fd, &key, value, -1) < 0 && errno == EINVAL); /* Check that key=1 can be found. Value could be 0 if the lookup @@ -186,10 +203,10 @@ static void test_hashmap_percpu(unsigned int task, void *data) key = 2; /* Check that key=2 is not found. */ - assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT); + assert(bpf_map_lookup_elem(fd, &key, value) < 0 && errno == ENOENT); /* BPF_EXIST means update existing element. */ - assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) < 0 && /* key=2 is not there. */ errno == ENOENT); @@ -202,11 +219,11 @@ static void test_hashmap_percpu(unsigned int task, void *data) * inserted due to max_entries limit. */ key = 0; - assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 && errno == E2BIG); /* Check that key = 0 doesn't exist. */ - assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); + assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT); /* Iterate over two elements. */ assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 && @@ -237,13 +254,13 @@ static void test_hashmap_percpu(unsigned int task, void *data) assert(bpf_map_delete_elem(fd, &key) == 0); key = 2; assert(bpf_map_delete_elem(fd, &key) == 0); - assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT); + assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT); key = 0; /* Check that map is empty. */ - assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 && errno == ENOENT); - assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 && errno == ENOENT); close(fd); @@ -360,7 +377,7 @@ static void test_arraymap(unsigned int task, void *data) assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); value = 0; - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 && errno == EEXIST); /* Check that key=1 can be found. */ @@ -374,11 +391,11 @@ static void test_arraymap(unsigned int task, void *data) * due to max_entries limit. */ key = 2; - assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 && errno == E2BIG); /* Check that key = 2 doesn't exist. */ - assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT); + assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT); /* Iterate over two elements. */ assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 && @@ -387,12 +404,12 @@ static void test_arraymap(unsigned int task, void *data) next_key == 0); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && next_key == 1); - assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 && errno == ENOENT); /* Delete shouldn't succeed. */ key = 1; - assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL); + assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL); close(fd); } @@ -418,7 +435,7 @@ static void test_arraymap_percpu(unsigned int task, void *data) assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0); bpf_percpu(values, 0) = 0; - assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) < 0 && errno == EEXIST); /* Check that key=1 can be found. */ @@ -433,11 +450,11 @@ static void test_arraymap_percpu(unsigned int task, void *data) /* Check that key=2 cannot be inserted due to max_entries limit. */ key = 2; - assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) < 0 && errno == E2BIG); /* Check that key = 2 doesn't exist. */ - assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT); + assert(bpf_map_lookup_elem(fd, &key, values) < 0 && errno == ENOENT); /* Iterate over two elements. */ assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 && @@ -446,12 +463,12 @@ static void test_arraymap_percpu(unsigned int task, void *data) next_key == 0); assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 && next_key == 1); - assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 && + assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 && errno == ENOENT); /* Delete shouldn't succeed. */ key = 1; - assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL); + assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL); close(fd); } @@ -555,7 +572,7 @@ static void test_queuemap(unsigned int task, void *data) assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); /* Check that element cannot be pushed due to max_entries limit */ - assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && + assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 && errno == E2BIG); /* Peek element */ @@ -571,12 +588,12 @@ static void test_queuemap(unsigned int task, void *data) val == vals[i]); /* Check that there are not elements left */ - assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 && errno == ENOENT); /* Check that non supported functions set errno to EINVAL */ - assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); - assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); + assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL); + assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL); close(fd); } @@ -613,7 +630,7 @@ static void test_stackmap(unsigned int task, void *data) assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); /* Check that element cannot be pushed due to max_entries limit */ - assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && + assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 && errno == E2BIG); /* Peek element */ @@ -629,12 +646,12 @@ static void test_stackmap(unsigned int task, void *data) val == vals[i]); /* Check that there are not elements left */ - assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 && errno == ENOENT); /* Check that non supported functions set errno to EINVAL */ - assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); - assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); + assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL); + assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL); close(fd); } @@ -835,7 +852,7 @@ static void test_sockmap(unsigned int tasks, void *data) } bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx"); - if (IS_ERR(bpf_map_rx)) { + if (!bpf_map_rx) { printf("Failed to load map rx from verdict prog\n"); goto out_sockmap; } @@ -847,7 +864,7 @@ static void test_sockmap(unsigned int tasks, void *data) } bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx"); - if (IS_ERR(bpf_map_tx)) { + if (!bpf_map_tx) { printf("Failed to load map tx from verdict prog\n"); goto out_sockmap; } @@ -859,7 +876,7 @@ static void test_sockmap(unsigned int tasks, void *data) } bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg"); - if (IS_ERR(bpf_map_msg)) { + if (!bpf_map_msg) { printf("Failed to load map msg from msg_verdict prog\n"); goto out_sockmap; } @@ -871,7 +888,7 @@ static void test_sockmap(unsigned int tasks, void *data) } bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break"); - if (IS_ERR(bpf_map_break)) { + if (!bpf_map_break) { printf("Failed to load map tx from verdict prog\n"); goto out_sockmap; } @@ -1153,7 +1170,7 @@ static void test_map_in_map(void) } map = bpf_object__find_map_by_name(obj, "mim_array"); - if (IS_ERR(map)) { + if (!map) { printf("Failed to load array of maps from test prog\n"); goto out_map_in_map; } @@ -1164,7 +1181,7 @@ static void test_map_in_map(void) } map = bpf_object__find_map_by_name(obj, "mim_hash"); - if (IS_ERR(map)) { + if (!map) { printf("Failed to load hash of maps from test prog\n"); goto out_map_in_map; } @@ -1177,7 +1194,7 @@ static void test_map_in_map(void) bpf_object__load(obj); map = bpf_object__find_map_by_name(obj, "mim_array"); - if (IS_ERR(map)) { + if (!map) { printf("Failed to load array of maps from test prog\n"); goto out_map_in_map; } @@ -1194,7 +1211,7 @@ static void test_map_in_map(void) } map = bpf_object__find_map_by_name(obj, "mim_hash"); - if (IS_ERR(map)) { + if (!map) { printf("Failed to load hash of maps from test prog\n"); goto out_map_in_map; } @@ -1246,7 +1263,7 @@ static void test_map_large(void) } key.c = -1; - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 && errno == E2BIG); /* Iterate through all elements. */ @@ -1254,12 +1271,12 @@ static void test_map_large(void) key.c = -1; for (i = 0; i < MAP_SIZE; i++) assert(bpf_map_get_next_key(fd, &key, &key) == 0); - assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT); + assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT); key.c = 0; assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0); key.a = 1; - assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT); + assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT); close(fd); } @@ -1391,7 +1408,7 @@ static void test_map_parallel(void) run_parallel(TASKS, test_update_delete, data); /* Check that key=0 is already there. */ - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 && errno == EEXIST); /* Check that all elements were inserted. */ @@ -1399,7 +1416,7 @@ static void test_map_parallel(void) key = -1; for (i = 0; i < MAP_SIZE; i++) assert(bpf_map_get_next_key(fd, &key, &key) == 0); - assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT); + assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT); /* Another check for all elements */ for (i = 0; i < MAP_SIZE; i++) { @@ -1415,8 +1432,8 @@ static void test_map_parallel(void) /* Nothing should be left. */ key = -1; - assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT); - assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT); + assert(bpf_map_get_next_key(fd, NULL, &key) < 0 && errno == ENOENT); + assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT); } static void test_map_rdonly(void) @@ -1434,12 +1451,12 @@ static void test_map_rdonly(void) key = 1; value = 1234; /* Try to insert key=1 element. */ - assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 && + assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) < 0 && errno == EPERM); /* Check that key=1 is not found. */ - assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT); - assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT); + assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT); + assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == ENOENT); close(fd); } @@ -1462,8 +1479,8 @@ static void test_map_wronly_hash(void) assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0); /* Check that reading elements and keys from the map is not allowed. */ - assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM); - assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM); + assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == EPERM); + assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == EPERM); close(fd); } @@ -1490,10 +1507,10 @@ static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type) assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0); /* Peek element should fail */ - assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM); + assert(bpf_map_lookup_elem(fd, NULL, &value) < 0 && errno == EPERM); /* Pop element should fail */ - assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 && + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) < 0 && errno == EPERM); close(fd); @@ -1547,7 +1564,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size, value = &fd32; } err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY); - CHECK(err != -1 || errno != EINVAL, + CHECK(err >= 0 || errno != EINVAL, "reuseport array update unbound sk", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1576,7 +1593,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size, */ err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY); - CHECK(err != -1 || errno != EINVAL, + CHECK(err >= 0 || errno != EINVAL, "reuseport array update non-listening sk", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1606,31 +1623,31 @@ static void test_reuseport_array(void) map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, sizeof(__u32), sizeof(__u64), array_size, 0); - CHECK(map_fd == -1, "reuseport array create", + CHECK(map_fd < 0, "reuseport array create", "map_fd:%d, errno:%d\n", map_fd, errno); /* Test lookup/update/delete with invalid index */ err = bpf_map_delete_elem(map_fd, &bad_index); - CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries", + CHECK(err >= 0 || errno != E2BIG, "reuseport array del >=max_entries", "err:%d errno:%d\n", err, errno); err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY); - CHECK(err != -1 || errno != E2BIG, + CHECK(err >= 0 || errno != E2BIG, "reuseport array update >=max_entries", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie); - CHECK(err != -1 || errno != ENOENT, + CHECK(err >= 0 || errno != ENOENT, "reuseport array update >=max_entries", "err:%d errno:%d\n", err, errno); /* Test lookup/delete non existence elem */ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie); - CHECK(err != -1 || errno != ENOENT, + CHECK(err >= 0 || errno != ENOENT, "reuseport array lookup not-exist elem", "err:%d errno:%d\n", err, errno); err = bpf_map_delete_elem(map_fd, &index3); - CHECK(err != -1 || errno != ENOENT, + CHECK(err >= 0 || errno != ENOENT, "reuseport array del not-exist elem", "err:%d errno:%d\n", err, errno); @@ -1644,7 +1661,7 @@ static void test_reuseport_array(void) /* BPF_EXIST failure case */ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx], BPF_EXIST); - CHECK(err != -1 || errno != ENOENT, + CHECK(err >= 0 || errno != ENOENT, "reuseport array update empty elem BPF_EXIST", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1653,7 +1670,7 @@ static void test_reuseport_array(void) /* BPF_NOEXIST success case */ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx], BPF_NOEXIST); - CHECK(err == -1, + CHECK(err < 0, "reuseport array update empty elem BPF_NOEXIST", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1662,7 +1679,7 @@ static void test_reuseport_array(void) /* BPF_EXIST success case. */ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx], BPF_EXIST); - CHECK(err == -1, + CHECK(err < 0, "reuseport array update same elem BPF_EXIST", "sock_type:%d err:%d errno:%d\n", type, err, errno); fds_idx = REUSEPORT_FD_IDX(err, fds_idx); @@ -1670,7 +1687,7 @@ static void test_reuseport_array(void) /* BPF_NOEXIST failure case */ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx], BPF_NOEXIST); - CHECK(err != -1 || errno != EEXIST, + CHECK(err >= 0 || errno != EEXIST, "reuseport array update non-empty elem BPF_NOEXIST", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1679,7 +1696,7 @@ static void test_reuseport_array(void) /* BPF_ANY case (always succeed) */ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx], BPF_ANY); - CHECK(err == -1, + CHECK(err < 0, "reuseport array update same sk with BPF_ANY", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1688,32 +1705,32 @@ static void test_reuseport_array(void) /* The same sk cannot be added to reuseport_array twice */ err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY); - CHECK(err != -1 || errno != EBUSY, + CHECK(err >= 0 || errno != EBUSY, "reuseport array update same sk with same index", "sock_type:%d err:%d errno:%d\n", type, err, errno); err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY); - CHECK(err != -1 || errno != EBUSY, + CHECK(err >= 0 || errno != EBUSY, "reuseport array update same sk with different index", "sock_type:%d err:%d errno:%d\n", type, err, errno); /* Test delete elem */ err = bpf_map_delete_elem(map_fd, &index3); - CHECK(err == -1, "reuseport array delete sk", + CHECK(err < 0, "reuseport array delete sk", "sock_type:%d err:%d errno:%d\n", type, err, errno); /* Add it back with BPF_NOEXIST */ err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST); - CHECK(err == -1, + CHECK(err < 0, "reuseport array re-add with BPF_NOEXIST after del", "sock_type:%d err:%d errno:%d\n", type, err, errno); /* Test cookie */ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie); - CHECK(err == -1 || sk_cookie != map_cookie, + CHECK(err < 0 || sk_cookie != map_cookie, "reuseport array lookup re-added sk", "sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn", type, err, errno, sk_cookie, map_cookie); @@ -1722,7 +1739,7 @@ static void test_reuseport_array(void) for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++) close(grpa_fds64[f]); err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie); - CHECK(err != -1 || errno != ENOENT, + CHECK(err >= 0 || errno != ENOENT, "reuseport array lookup after close()", "sock_type:%d err:%d errno:%d\n", type, err, errno); @@ -1733,7 +1750,7 @@ static void test_reuseport_array(void) CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n", err, errno); err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST); - CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW", + CHECK(err >= 0 || errno != ENOTSUPP, "reuseport array update SOCK_RAW", "err:%d errno:%d\n", err, errno); close(fd64); @@ -1743,16 +1760,16 @@ static void test_reuseport_array(void) /* Test 32 bit fd */ map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, sizeof(__u32), sizeof(__u32), array_size, 0); - CHECK(map_fd == -1, "reuseport array create", + CHECK(map_fd < 0, "reuseport array create", "map_fd:%d, errno:%d\n", map_fd, errno); prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64, &sk_cookie, 1); fd = fd64; err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST); - CHECK(err == -1, "reuseport array update 32 bit fd", + CHECK(err < 0, "reuseport array update 32 bit fd", "err:%d errno:%d\n", err, errno); err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie); - CHECK(err != -1 || errno != ENOSPC, + CHECK(err >= 0 || errno != ENOSPC, "reuseport array lookup 32 bit fd", "err:%d errno:%d\n", err, errno); close(fd); @@ -1798,6 +1815,8 @@ int main(void) { srand(time(NULL)); + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + map_flags = 0; run_all_tests(); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 6396932b97e2..6f103106a39b 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -737,6 +737,9 @@ int main(int argc, char **argv) if (err) return err; + /* Use libbpf 1.0 API mode */ + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + libbpf_set_print(libbpf_print_fn); srand(time(NULL)); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index dda52cb649dc..8ef7f334e715 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -249,16 +249,17 @@ extern int test__join_cgroup(const char *path); #define ASSERT_OK_PTR(ptr, name) ({ \ static int duration = 0; \ const void *___res = (ptr); \ - bool ___ok = !IS_ERR_OR_NULL(___res); \ - CHECK(!___ok, (name), \ - "unexpected error: %ld\n", PTR_ERR(___res)); \ + int ___err = libbpf_get_error(___res); \ + bool ___ok = ___err == 0; \ + CHECK(!___ok, (name), "unexpected error: %d\n", ___err); \ ___ok; \ }) #define ASSERT_ERR_PTR(ptr, name) ({ \ static int duration = 0; \ const void *___res = (ptr); \ - bool ___ok = IS_ERR(___res); \ + int ___err = libbpf_get_error(___res); \ + bool ___ok = ___err != 0; \ CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \ ___ok; \ }) diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh deleted file mode 100755 index 8868aa1ca902..000000000000 --- a/tools/testing/selftests/bpf/test_tc_redirect.sh +++ /dev/null @@ -1,216 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link -# between src and dst. The netns fwd has veth links to each src and dst. The -# client is in src and server in dst. The test installs a TC BPF program to each -# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the -# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace -# switch from ingress side; it also installs a checker prog on the egress side -# to drop unexpected traffic. - -if [[ $EUID -ne 0 ]]; then - echo "This script must be run as root" - echo "FAIL" - exit 1 -fi - -# check that needed tools are present -command -v nc >/dev/null 2>&1 || \ - { echo >&2 "nc is not available"; exit 1; } -command -v dd >/dev/null 2>&1 || \ - { echo >&2 "dd is not available"; exit 1; } -command -v timeout >/dev/null 2>&1 || \ - { echo >&2 "timeout is not available"; exit 1; } -command -v ping >/dev/null 2>&1 || \ - { echo >&2 "ping is not available"; exit 1; } -if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi -command -v perl >/dev/null 2>&1 || \ - { echo >&2 "perl is not available"; exit 1; } -command -v jq >/dev/null 2>&1 || \ - { echo >&2 "jq is not available"; exit 1; } -command -v bpftool >/dev/null 2>&1 || \ - { echo >&2 "bpftool is not available"; exit 1; } - -readonly GREEN='\033[0;92m' -readonly RED='\033[0;31m' -readonly NC='\033[0m' # No Color - -readonly PING_ARG="-c 3 -w 10 -q" - -readonly TIMEOUT=10 - -readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" -readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)" -readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" - -readonly IP4_SRC="172.16.1.100" -readonly IP4_DST="172.16.2.100" - -readonly IP6_SRC="::1:dead:beef:cafe" -readonly IP6_DST="::2:dead:beef:cafe" - -readonly IP4_SLL="169.254.0.1" -readonly IP4_DLL="169.254.0.2" -readonly IP4_NET="169.254.0.0" - -netns_cleanup() -{ - ip netns del ${NS_SRC} - ip netns del ${NS_FWD} - ip netns del ${NS_DST} -} - -netns_setup() -{ - ip netns add "${NS_SRC}" - ip netns add "${NS_FWD}" - ip netns add "${NS_DST}" - - ip link add veth_src type veth peer name veth_src_fwd - ip link add veth_dst type veth peer name veth_dst_fwd - - ip link set veth_src netns ${NS_SRC} - ip link set veth_src_fwd netns ${NS_FWD} - - ip link set veth_dst netns ${NS_DST} - ip link set veth_dst_fwd netns ${NS_FWD} - - ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src - ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst - - # The fwd netns automatically get a v6 LL address / routes, but also - # needs v4 one in order to start ARP probing. IP4_NET route is added - # to the endpoints so that the ARP processing will reply. - - ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd - ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd - - ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad - ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad - - ip -netns ${NS_SRC} link set dev veth_src up - ip -netns ${NS_FWD} link set dev veth_src_fwd up - - ip -netns ${NS_DST} link set dev veth_dst up - ip -netns ${NS_FWD} link set dev veth_dst_fwd up - - ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global - ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global - ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global - - ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global - ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global - - ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global - ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global - ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global - - ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global - ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global - - fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address) - fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address) - - ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src - ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst - - ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src - ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst -} - -netns_test_connectivity() -{ - set +e - - ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &" - ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &" - - TEST="TCPv4 connectivity test" - ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004" - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="TCPv6 connectivity test" - ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006" - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="ICMPv4 connectivity test" - ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST} - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="ICMPv6 connectivity test" - ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST} - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - set -e -} - -hex_mem_str() -{ - perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1 -} - -netns_setup_bpf() -{ - local obj=$1 - local use_forwarding=${2:-0} - - ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact - ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress - ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress - - ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact - ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress - ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress - - if [ "$use_forwarding" -eq "1" ]; then - # bpf_fib_lookup() checks if forwarding is enabled - ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1 - ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1 - ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1 - return 0 - fi - - veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex) - veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex) - - progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]') - for prog in $progs; do - map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]') - if [ ! -z "$map" ]; then - bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src) - bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst) - fi - done -} - -trap netns_cleanup EXIT -set -e - -netns_setup -netns_setup_bpf test_tc_neigh.o -netns_test_connectivity -netns_cleanup -netns_setup -netns_setup_bpf test_tc_neigh_fib.o 1 -netns_test_connectivity -netns_cleanup -netns_setup -netns_setup_bpf test_tc_peer.o -netns_test_connectivity diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 73da7fe8c152..4a39304cc5a6 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -82,6 +82,8 @@ int main(int argc, char **argv) cpu_set_t cpuset; __u32 key = 0; + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + CPU_ZERO(&cpuset); CPU_SET(0, &cpuset); pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); @@ -116,7 +118,7 @@ int main(int argc, char **argv) pb_opts.sample_cb = dummyfn; pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts); - if (IS_ERR(pb)) + if (!pb) goto err; pthread_create(&tid, NULL, poller_thread, pb); @@ -163,7 +165,6 @@ err: bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); close(cg_fd); cleanup_cgroup_environment(); - if (!IS_ERR_OR_NULL(pb)) - perf_buffer__free(pb); + perf_buffer__free(pb); return error; } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 1512092e1e68..3a9e332c5e36 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1147,7 +1147,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, } } - if (test->insn_processed) { + if (!unpriv && test->insn_processed) { uint32_t insn_processed; char *proc; diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh new file mode 100755 index 000000000000..1538373157e3 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh @@ -0,0 +1,204 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test topology: +# - - - - - - - - - - - - - - - - - - - - - - - - - +# | veth1 veth2 veth3 | ... init net +# - -| - - - - - - | - - - - - - | - - +# --------- --------- --------- +# | veth0 | | veth0 | | veth0 | ... +# --------- --------- --------- +# ns1 ns2 ns3 +# +# Test modules: +# XDP modes: generic, native, native + egress_prog +# +# Test cases: +# ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive +# the redirects. +# ns1 -> gw: ns1, ns2, ns3, should receive the arp request +# IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress +# interface should not receive the redirects. +# ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects. +# IPv6: Testing none flag, all the pkts should be redirected back +# ping test: ns1 -> ns2 (block), echo requests will be redirect back +# egress_prog: +# all src mac should be egress interface's mac + +# netns numbers +NUM=3 +IFACES="" +DRV_MODE="xdpgeneric xdpdrv xdpegress" +PASS=0 +FAIL=0 + +test_pass() +{ + echo "Pass: $@" + PASS=$((PASS + 1)) +} + +test_fail() +{ + echo "fail: $@" + FAIL=$((FAIL + 1)) +} + +clean_up() +{ + for i in $(seq $NUM); do + ip link del veth$i 2> /dev/null + ip netns del ns$i 2> /dev/null + done +} + +# Kselftest framework requirement - SKIP code is 4. +check_env() +{ + ip link set dev lo xdpgeneric off &>/dev/null + if [ $? -ne 0 ];then + echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support" + exit 4 + fi + + which tcpdump &>/dev/null + if [ $? -ne 0 ];then + echo "selftests: [SKIP] Could not run test without tcpdump" + exit 4 + fi +} + +setup_ns() +{ + local mode=$1 + IFACES="" + + if [ "$mode" = "xdpegress" ]; then + mode="xdpdrv" + fi + + for i in $(seq $NUM); do + ip netns add ns$i + ip link add veth$i type veth peer name veth0 netns ns$i + ip link set veth$i up + ip -n ns$i link set veth0 up + + ip -n ns$i addr add 192.0.2.$i/24 dev veth0 + ip -n ns$i addr add 2001:db8::$i/64 dev veth0 + # Add a neigh entry for IPv4 ping test + ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0 + ip -n ns$i link set veth0 $mode obj \ + xdp_dummy.o sec xdp_dummy &> /dev/null || \ + { test_fail "Unable to load dummy xdp" && exit 1; } + IFACES="$IFACES veth$i" + veth_mac[$i]=$(ip link show veth$i | awk '/link\/ether/ {print $2}') + done +} + +do_egress_tests() +{ + local mode=$1 + + # mac test + ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-2_${mode}.log & + ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-3_${mode}.log & + sleep 0.5 + ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null + sleep 0.5 + pkill -9 tcpdump + + # mac check + grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" mac_ns1-2_${mode}.log && \ + test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2" + grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" mac_ns1-3_${mode}.log && \ + test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3" +} + +do_ping_tests() +{ + local mode=$1 + + # ping6 test: echo request should be redirect back to itself, not others + ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02 + + ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ns1-1_${mode}.log & + ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ns1-2_${mode}.log & + ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ns1-3_${mode}.log & + sleep 0.5 + # ARP test + ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null + # IPv4 test + ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null + # IPv6 test + ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null + sleep 0.5 + pkill -9 tcpdump + + # All netns should receive the redirect arp requests + [ $(grep -c "who-has 192.0.2.254" ns1-1_${mode}.log) -gt 4 ] && \ + test_pass "$mode arp(F_BROADCAST) ns1-1" || \ + test_fail "$mode arp(F_BROADCAST) ns1-1" + [ $(grep -c "who-has 192.0.2.254" ns1-2_${mode}.log) -le 4 ] && \ + test_pass "$mode arp(F_BROADCAST) ns1-2" || \ + test_fail "$mode arp(F_BROADCAST) ns1-2" + [ $(grep -c "who-has 192.0.2.254" ns1-3_${mode}.log) -le 4 ] && \ + test_pass "$mode arp(F_BROADCAST) ns1-3" || \ + test_fail "$mode arp(F_BROADCAST) ns1-3" + + # ns1 should not receive the redirect echo request, others should + [ $(grep -c "ICMP echo request" ns1-1_${mode}.log) -eq 4 ] && \ + test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \ + test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" + [ $(grep -c "ICMP echo request" ns1-2_${mode}.log) -eq 4 ] && \ + test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \ + test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" + [ $(grep -c "ICMP echo request" ns1-3_${mode}.log) -eq 4 ] && \ + test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \ + test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" + + # ns1 should receive the echo request, ns2 should not + [ $(grep -c "ICMP6, echo request" ns1-1_${mode}.log) -eq 4 ] && \ + test_pass "$mode IPv6 (no flags) ns1-1" || \ + test_fail "$mode IPv6 (no flags) ns1-1" + [ $(grep -c "ICMP6, echo request" ns1-2_${mode}.log) -eq 0 ] && \ + test_pass "$mode IPv6 (no flags) ns1-2" || \ + test_fail "$mode IPv6 (no flags) ns1-2" +} + +do_tests() +{ + local mode=$1 + local drv_p + + case ${mode} in + xdpdrv) drv_p="-N";; + xdpegress) drv_p="-X";; + xdpgeneric) drv_p="-S";; + esac + + ./xdp_redirect_multi $drv_p $IFACES &> xdp_redirect_${mode}.log & + xdp_pid=$! + sleep 1 + + if [ "$mode" = "xdpegress" ]; then + do_egress_tests $mode + else + do_ping_tests $mode + fi + + kill $xdp_pid +} + +trap clean_up 0 2 3 6 9 + +check_env +rm -f xdp_redirect_*.log ns*.log mac_ns*.log + +for mode in ${DRV_MODE}; do + setup_ns $mode + do_tests $mode + clean_up +done + +echo "Summary: PASS $PASS, FAIL $FAIL" +[ $FAIL -eq 0 ] && exit 0 || exit 1 diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c index ca8fdb1b3f01..7d7ebee5cc7a 100644 --- a/tools/testing/selftests/bpf/verifier/and.c +++ b/tools/testing/selftests/bpf/verifier/and.c @@ -61,6 +61,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R1 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 0 }, diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index 8a1caf46ffbc..e061e8799ce2 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -508,6 +508,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT }, { @@ -528,6 +530,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT }, { @@ -569,6 +573,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 min value is outside of the allowed memory range", + .result_unpriv = REJECT, .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, @@ -589,6 +595,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 min value is outside of the allowed memory range", + .result_unpriv = REJECT, .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, @@ -609,6 +617,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 min value is outside of the allowed memory range", + .result_unpriv = REJECT, .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, @@ -674,6 +684,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 min value is outside of the allowed memory range", + .result_unpriv = REJECT, .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, @@ -695,6 +707,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 min value is outside of the allowed memory range", + .result_unpriv = REJECT, .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c index 17fe33a75034..2c8935b3e65d 100644 --- a/tools/testing/selftests/bpf/verifier/dead_code.c +++ b/tools/testing/selftests/bpf/verifier/dead_code.c @@ -8,6 +8,8 @@ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 7, }, diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c index bd5cae4a7f73..1c857b2fbdf0 100644 --- a/tools/testing/selftests/bpf/verifier/jmp32.c +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -87,6 +87,8 @@ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -150,6 +152,8 @@ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -213,6 +217,8 @@ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -280,6 +286,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -348,6 +356,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -416,6 +426,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -484,6 +496,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -552,6 +566,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -620,6 +636,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -688,6 +706,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, @@ -756,6 +776,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, }, diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c index 8dcd4e0383d5..11fc68da735e 100644 --- a/tools/testing/selftests/bpf/verifier/jset.c +++ b/tools/testing/selftests/bpf/verifier/jset.c @@ -82,8 +82,8 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .retval_unpriv = 1, - .result_unpriv = ACCEPT, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .retval = 1, .result = ACCEPT, }, @@ -141,7 +141,8 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .result_unpriv = ACCEPT, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -162,6 +163,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .result_unpriv = ACCEPT, + .errstr_unpriv = "R9 !read_ok", + .result_unpriv = REJECT, .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c index 07eaa04412ae..8ab94d65f3d5 100644 --- a/tools/testing/selftests/bpf/verifier/stack_ptr.c +++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c @@ -295,8 +295,6 @@ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), BPF_EXIT_INSN(), }, - .result_unpriv = REJECT, - .errstr_unpriv = "invalid write to stack R1 off=0 size=1", .result = ACCEPT, .retval = 42, }, diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index bd436df5cc32..111801aea5e3 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -420,6 +420,8 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R7 invalid mem access 'inv'", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 0, }, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index e5913fd3b903..a3e593ddfafc 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -120,7 +120,7 @@ .fixup_map_array_48b = { 1 }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", + .errstr_unpriv = "R2 pointer comparison prohibited", .retval = 0, }, { @@ -159,7 +159,8 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), // fake-dead code; targeted from branch A to - // prevent dead code sanitization + // prevent dead code sanitization, rejected + // via branch B however BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), @@ -167,7 +168,7 @@ .fixup_map_array_48b = { 1 }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", + .errstr_unpriv = "R0 invalid mem access 'inv'", .retval = 0, }, { @@ -300,8 +301,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -371,8 +370,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -472,8 +469,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { @@ -766,8 +761,6 @@ }, .fixup_map_array_48b = { 3 }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .retval = 1, }, { diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c new file mode 100644 index 000000000000..3696a8f32c23 --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <net/if.h> +#include <unistd.h> +#include <libgen.h> +#include <sys/resource.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> + +#include "bpf_util.h" +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#define MAX_IFACE_NUM 32 +#define MAX_INDEX_NUM 1024 + +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; +static int ifaces[MAX_IFACE_NUM] = {}; + +static void int_exit(int sig) +{ + __u32 prog_id = 0; + int i; + + for (i = 0; ifaces[i] > 0; i++) { + if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) { + printf("bpf_get_link_xdp_id failed\n"); + exit(1); + } + if (prog_id) + bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags); + } + + exit(0); +} + +static int get_mac_addr(unsigned int ifindex, void *mac_addr) +{ + char ifname[IF_NAMESIZE]; + struct ifreq ifr; + int fd, ret = -1; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return ret; + + if (!if_indextoname(ifindex, ifname)) + goto err_out; + + strcpy(ifr.ifr_name, ifname); + + if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0) + goto err_out; + + memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char)); + ret = 0; + +err_out: + close(fd); + return ret; +} + +static void usage(const char *prog) +{ + fprintf(stderr, + "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n" + "OPTS:\n" + " -S use skb-mode\n" + " -N enforce native mode\n" + " -F force loading prog\n" + " -X load xdp program on egress\n", + prog); +} + +int main(int argc, char **argv) +{ + int prog_fd, group_all, mac_map; + struct bpf_program *ingress_prog, *egress_prog; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_UNSPEC, + }; + int i, ret, opt, egress_prog_fd = 0; + struct bpf_devmap_val devmap_val; + bool attach_egress_prog = false; + unsigned char mac_addr[6]; + char ifname[IF_NAMESIZE]; + struct bpf_object *obj; + unsigned int ifindex; + char filename[256]; + + while ((opt = getopt(argc, argv, "SNFX")) != -1) { + switch (opt) { + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + /* default, set below */ + break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; + case 'X': + attach_egress_prog = true; + break; + default: + usage(basename(argv[0])); + return 1; + } + } + + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) { + xdp_flags |= XDP_FLAGS_DRV_MODE; + } else if (attach_egress_prog) { + printf("Load xdp program on egress with SKB mode not supported yet\n"); + goto err_out; + } + + if (optind == argc) { + printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]); + goto err_out; + } + + printf("Get interfaces"); + for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) { + ifaces[i] = if_nametoindex(argv[optind + i]); + if (!ifaces[i]) + ifaces[i] = strtoul(argv[optind + i], NULL, 0); + if (!if_indextoname(ifaces[i], ifname)) { + perror("Invalid interface name or i"); + goto err_out; + } + if (ifaces[i] > MAX_INDEX_NUM) { + printf("Interface index to large\n"); + goto err_out; + } + printf(" %d", ifaces[i]); + } + printf("\n"); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; + + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + goto err_out; + + if (attach_egress_prog) + group_all = bpf_object__find_map_fd_by_name(obj, "map_egress"); + else + group_all = bpf_object__find_map_fd_by_name(obj, "map_all"); + mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map"); + + if (group_all < 0 || mac_map < 0) { + printf("bpf_object__find_map_fd_by_name failed\n"); + goto err_out; + } + + if (attach_egress_prog) { + /* Find ingress/egress prog for 2nd xdp prog */ + ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog"); + egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog"); + if (!ingress_prog || !egress_prog) { + printf("finding ingress/egress_prog in obj file failed\n"); + goto err_out; + } + prog_fd = bpf_program__fd(ingress_prog); + egress_prog_fd = bpf_program__fd(egress_prog); + if (prog_fd < 0 || egress_prog_fd < 0) { + printf("find egress_prog fd failed\n"); + goto err_out; + } + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + /* Init forward multicast groups and exclude group */ + for (i = 0; ifaces[i] > 0; i++) { + ifindex = ifaces[i]; + + if (attach_egress_prog) { + ret = get_mac_addr(ifindex, mac_addr); + if (ret < 0) { + printf("get interface %d mac failed\n", ifindex); + goto err_out; + } + ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0); + if (ret) { + perror("bpf_update_elem mac_map failed\n"); + goto err_out; + } + } + + /* Add all the interfaces to group all */ + devmap_val.ifindex = ifindex; + devmap_val.bpf_prog.fd = egress_prog_fd; + ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0); + if (ret) { + perror("bpf_map_update_elem"); + goto err_out; + } + + /* bind prog_fd to each interface */ + ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags); + if (ret) { + printf("Set xdp fd failed on %d\n", ifindex); + goto err_out; + } + } + + /* sleep some time for testing */ + sleep(999); + + return 0; + +err_out: + return 1; +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh index 4029833f7e27..160891dcb4bc 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -109,6 +109,9 @@ router_destroy() __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 tc qdisc del dev $rp2 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down } setup_prepare() diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh index 42d44e27802c..190c1b6b5365 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh @@ -111,6 +111,9 @@ router_destroy() __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 tc qdisc del dev $rp2 clsact + + ip link set dev $rp2 down + ip link set dev $rp1 down } setup_prepare() diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh index 65f43a7ce9c9..1e9a4aff76a2 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh @@ -7,6 +7,8 @@ PORT_NUM_NETIFS=0 +declare -a unsplit + port_setup_prepare() { : @@ -20,12 +22,12 @@ port_cleanup() devlink port unsplit $port check_err $? "Did not unsplit $netdev" done + unsplit=() } split_all_ports() { local should_fail=$1; shift - local -a unsplit # Loop over the splittable netdevs and create tuples of netdev along # with its width. For example: diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh index 5cbff8038f84..28a570006d4d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh @@ -93,7 +93,9 @@ switch_destroy() lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null lldpad_app_wait_del + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh index 27de3d9ed08e..f4493ef9cca1 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh @@ -29,37 +29,38 @@ cleanup() get_prio_pg() { - __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' | - grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2- + # Produces a string of numbers "<B0> <B1> ... <B7> ", where BX is number + # of buffer that priority X is mapped to. + dcb -j buffer show dev $swp | + jq -r '[.prio_buffer | .[] | tostring + " "] | add' } get_prio_pfc() { - __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' | - grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2- + # Produces a string of numbers "<P0> <P1> ... <P7> ", where PX denotes + # whether priority X has PFC enabled (the value is 1) or disabled (0). + dcb -j pfc show dev $swp | + jq -r '[.prio_pfc | .[] | if . then "1 " else "0 " end] | add' } get_prio_tc() { - __mlnx_qos -i $swp | sed -n '/^tc/,$p' | - awk '/^tc/ { TC = $2 } - /priority:/ { PRIO[$2]=TC } - END { - for (i in PRIO) - printf("%d ", PRIO[i]) - }' + # Produces a string of numbers "<T0> <T1> ... <T7> ", where TC is number + # of TC that priority X is mapped to. + dcb -j ets show dev $swp | + jq -r '[.prio_tc | .[] | tostring + " "] | add' } get_buf_size() { local idx=$1; shift - __mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1)) + dcb -j buffer show dev $swp | jq ".buffer_size[$idx]" } get_tot_size() { - __mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//' + dcb -j buffer show dev $swp | jq '.total_size' } check_prio_pg() @@ -121,18 +122,18 @@ test_dcb_ets() { RET=0 - __mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null + dcb ets set dev $swp prio-tc 0:0 1:2 2:4 3:6 4:1 5:3 6:5 7:7 check_prio_pg "0 2 4 6 1 3 5 7 " check_prio_tc "0 2 4 6 1 3 5 7 " check_prio_pfc "0 0 0 0 0 0 0 0 " - __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + dcb ets set dev $swp prio-tc all:0 check_prio_pg "0 0 0 0 0 0 0 0 " check_prio_tc "0 0 0 0 0 0 0 0 " - __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null + dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6 2>/dev/null check_fail $? "prio2buffer accepted in DCB mode" log_test "Configuring headroom through ETS" @@ -174,7 +175,7 @@ test_pfc() { RET=0 - __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null + dcb ets set dev $swp prio-tc all:0 5:1 6:2 7:3 local buf0size=$(get_buf_size 0) local buf1size=$(get_buf_size 1) @@ -193,7 +194,7 @@ test_pfc() RET=0 - __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null + dcb pfc set dev $swp prio-pfc all:off 5:on 6:on 7:on delay 0 check_prio_pg "0 0 0 0 0 1 2 3 " check_prio_pfc "0 0 0 0 0 1 1 1 " @@ -210,7 +211,7 @@ test_pfc() RET=0 - __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null + dcb pfc set dev $swp delay 1000 check_buf_size 0 "== $buf0size" check_buf_size 1 "> $buf1size" @@ -221,8 +222,8 @@ test_pfc() RET=0 - __mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null - __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + dcb pfc set dev $swp prio-pfc all:off delay 0 + dcb ets set dev $swp prio-tc all:0 check_prio_pg "0 0 0 0 0 0 0 0 " check_prio_tc "0 0 0 0 0 0 0 0 " @@ -242,13 +243,13 @@ test_tc_priomap() { RET=0 - __mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null + dcb ets set dev $swp prio-tc 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 check_prio_pg "0 1 2 3 4 5 6 7 " tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M check_prio_pg "0 0 0 0 0 0 0 0 " - __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null + dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6 check_prio_pg "1 3 5 7 0 2 4 6 " tc qdisc delete dev $swp root @@ -256,9 +257,9 @@ test_tc_priomap() # Clean up. tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M - __mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp prio-buffer all:0 tc qdisc delete dev $swp root - __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + dcb ets set dev $swp prio-tc all:0 log_test "TC: priomap" } @@ -270,12 +271,12 @@ test_tc_sizes() RET=0 - __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null + dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null check_fail $? "buffer_size should fail before qdisc is added" tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M - __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 0:$size check_err $? "buffer_size should pass after qdisc is added" check_buf_size 0 "== $size" "set size: " @@ -283,26 +284,26 @@ test_tc_sizes() check_buf_size 0 "== $size" "set MTU: " mtu_restore $swp - __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 # After replacing the qdisc for the same kind, buffer_size still has to # work. tc qdisc replace dev $swp root handle 1: bfifo limit 1M - __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 0:$size check_buf_size 0 "== $size" "post replace, set size: " - __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 # Likewise after replacing for a different kind. tc qdisc replace dev $swp root handle 2: prio bands 8 - __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 0:$size check_buf_size 0 "== $size" "post replace different kind, set size: " tc qdisc delete dev $swp root - __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null + dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null check_fail $? "buffer_size should fail after qdisc is deleted" log_test "TC: buffer size" @@ -363,10 +364,10 @@ test_tc_int_buf() tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M test_int_buf "TC: " - __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 0:$size test_int_buf "TC+buffsize: " - __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + dcb buffer set dev $swp buffer-size all:0 tc qdisc delete dev $swp root } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh index 0bf76f13c030..faa51012cdac 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh @@ -82,17 +82,3 @@ bail_on_lldpad() fi fi } - -__mlnx_qos() -{ - local err - - mlnx_qos "$@" 2>/dev/null - err=$? - - if ((err)); then - echo "Error ($err) in mlnx_qos $@" >/dev/stderr - fi - - return $err -} diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh index 5c7700212f75..5d5622fc2758 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -171,7 +171,7 @@ switch_create() # assignment. tc qdisc replace dev $swp1 root handle 1: \ ets bands 8 strict 8 priomap 7 6 - __mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + dcb buffer set dev $swp1 prio-buffer all:0 1:1 # $swp2 # ----- @@ -209,8 +209,8 @@ switch_create() # the lossless prio into a buffer of its own. Don't bother with buffer # sizes though, there is not going to be any pressure in the "backward" # direction. - __mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null - __mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null + dcb buffer set dev $swp3 prio-buffer all:0 1:1 + dcb pfc set dev $swp3 prio-pfc all:off 1:on # $swp4 # ----- @@ -226,11 +226,11 @@ switch_create() # Configure qdisc so that we can hand-tune headroom. tc qdisc replace dev $swp4 root handle 1: \ ets bands 8 strict 8 priomap 7 6 - __mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null - __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null + dcb buffer set dev $swp4 prio-buffer all:0 1:1 + dcb pfc set dev $swp4 prio-pfc all:off 1:on # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which # is (-2*MTU) about 80K of delay provision. - __mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null + dcb buffer set dev $swp4 buffer-size all:0 1:$_100KB # bridges # ------- @@ -273,9 +273,9 @@ switch_destroy() # $swp4 # ----- - __mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null - __mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null - __mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + dcb buffer set dev $swp4 buffer-size all:0 + dcb pfc set dev $swp4 prio-pfc all:off + dcb buffer set dev $swp4 prio-buffer all:0 tc qdisc del dev $swp4 root devlink_tc_bind_pool_th_restore $swp4 1 ingress @@ -288,8 +288,8 @@ switch_destroy() # $swp3 # ----- - __mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null - __mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + dcb pfc set dev $swp3 prio-pfc all:off + dcb buffer set dev $swp3 prio-buffer all:0 tc qdisc del dev $swp3 root devlink_tc_bind_pool_th_restore $swp3 1 egress @@ -315,7 +315,7 @@ switch_destroy() # $swp1 # ----- - __mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + dcb buffer set dev $swp1 prio-buffer all:0 tc qdisc del dev $swp1 root devlink_tc_bind_pool_th_restore $swp1 1 ingress diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh index e93878d42596..683759d29199 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh @@ -68,7 +68,7 @@ wait_for_routes() local t0=$1; shift local route_count=$1; shift - local t1=$(ip route | grep -o 'offload' | wc -l) + local t1=$(ip route | grep 'offload' | grep -v 'offload_failed' | wc -l) local delta=$((t1 - t0)) echo $delta [[ $delta -ge $route_count ]] diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh index 093bed088ad0..373d5f2a846e 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh @@ -234,15 +234,15 @@ __tc_sample_rate_test() psample_capture_start - ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \ -B $dip -t udp dp=52768,sp=42768 -q psample_capture_stop pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l) - pct=$((100 * (pkts - 100) / 100)) + pct=$((100 * (pkts - 10000) / 10000)) (( -25 <= pct && pct <= 25)) - check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" + check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" log_test "tc sample rate ($desc)" @@ -587,15 +587,15 @@ __tc_sample_acl_rate_test() psample_capture_start - ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \ + ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q psample_capture_stop pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l) - pct=$((100 * (pkts - 100) / 100)) + pct=$((100 * (pkts - 10000) / 10000)) (( -25 <= pct && pct <= 25)) - check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" + check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%" # Setup a filter that should not match any packet and make sure packets # are not sampled. diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 40909c254365..9de1d123f4f5 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -5,12 +5,13 @@ lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS="fw_flash_test params_test regions_test reload_test \ netns_reload_test resource_test dev_info_test \ - empty_reporter_test dummy_reporter_test" + empty_reporter_test dummy_reporter_test rate_test" NUM_NETIFS=0 source $lib_dir/lib.sh BUS_ADDR=10 PORT_COUNT=4 +VF_COUNT=4 DEV_NAME=netdevsim$BUS_ADDR SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/ DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/ @@ -507,6 +508,170 @@ dummy_reporter_test() log_test "dummy reporter test" } +rate_leafs_get() +{ + local handle=$1 + + cmd_jq "devlink port function rate show -j" \ + '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))' +} + +rate_nodes_get() +{ + local handle=$1 + + cmd_jq "devlink port function rate show -j" \ + '.[] | to_entries | .[] | select(.value.type == "node") | .key | select(contains("'$handle'"))' +} + +rate_attr_set() +{ + local handle=$1 + local name=$2 + local value=$3 + local units=$4 + + devlink port function rate set $handle $name $value$units +} + +rate_attr_get() +{ + local handle=$1 + local name=$2 + + cmd_jq "devlink port function rate show $handle -j" '.[][].'$name +} + +rate_attr_tx_rate_check() +{ + local handle=$1 + local name=$2 + local rate=$3 + local debug_file=$4 + + rate_attr_set $handle $name $rate mbit + check_err $? "Failed to set $name value" + + local debug_value=$(cat $debug_file) + check_err $? "Failed to read $name value from debugfs" + [ "$debug_value" == "$rate" ] + check_err $? "Unexpected $name debug value $debug_value != $rate" + + local api_value=$(( $(rate_attr_get $handle $name) * 8 / 1000000 )) + check_err $? "Failed to get $name attr value" + [ "$api_value" == "$rate" ] + check_err $? "Unexpected $name attr value $api_value != $rate" +} + +rate_attr_parent_check() +{ + local handle=$1 + local parent=$2 + local debug_file=$3 + + rate_attr_set $handle parent $parent + check_err $? "Failed to set parent" + + debug_value=$(cat $debug_file) + check_err $? "Failed to get parent debugfs value" + [ "$debug_value" == "$parent" ] + check_err $? "Unexpected parent debug value $debug_value != $parent" + + api_value=$(rate_attr_get $r_obj parent) + check_err $? "Failed to get parent attr value" + [ "$api_value" == "$parent" ] + check_err $? "Unexpected parent attr value $api_value != $parent" +} + +rate_node_add() +{ + local handle=$1 + + devlink port function rate add $handle +} + +rate_node_del() +{ + local handle=$1 + + devlink port function rate del $handle +} + +rate_test() +{ + RET=0 + + echo $VF_COUNT > /sys/bus/netdevsim/devices/$DEV_NAME/sriov_numvfs + devlink dev eswitch set $DL_HANDLE mode switchdev + local leafs=`rate_leafs_get $DL_HANDLE` + local num_leafs=`echo $leafs | wc -w` + [ "$num_leafs" == "$VF_COUNT" ] + check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs" + + rate=10 + for r_obj in $leafs + do + rate_attr_tx_rate_check $r_obj tx_share $rate \ + $DEBUGFS_DIR/ports/${r_obj##*/}/tx_share + rate=$(($rate+10)) + done + + rate=100 + for r_obj in $leafs + do + rate_attr_tx_rate_check $r_obj tx_max $rate \ + $DEBUGFS_DIR/ports/${r_obj##*/}/tx_max + rate=$(($rate+100)) + done + + local node1_name='group1' + local node1="$DL_HANDLE/$node1_name" + rate_node_add "$node1" + check_err $? "Failed to add node $node1" + + local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` + [ $num_nodes == 1 ] + check_err $? "Expected 1 rate node in output but got $num_nodes" + + local node_tx_share=10 + rate_attr_tx_rate_check $node1 tx_share $node_tx_share \ + $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_share + + local node_tx_max=100 + rate_attr_tx_rate_check $node1 tx_max $node_tx_max \ + $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max + + rate_node_del "$node1" + check_err $? "Failed to delete node $node1" + local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` + [ $num_nodes == 0 ] + check_err $? "Expected 0 rate node but got $num_nodes" + + local node1_name='group1' + local node1="$DL_HANDLE/$node1_name" + rate_node_add "$node1" + check_err $? "Failed to add node $node1" + + rate_attr_parent_check $r_obj $node1_name \ + $DEBUGFS_DIR/ports/${r_obj##*/}/rate_parent + + local node2_name='group2' + local node2="$DL_HANDLE/$node2_name" + rate_node_add "$node2" + check_err $? "Failed to add node $node2" + + rate_attr_parent_check $node2 $node1_name \ + $DEBUGFS_DIR/rate_nodes/$node2_name/rate_parent + rate_node_del "$node2" + check_err $? "Failed to delete node $node2" + rate_attr_set "$r_obj" noparent + check_err $? "Failed to unset $r_obj parent node" + rate_node_del "$node1" + check_err $? "Failed to delete node $node1" + + log_test "rate test" +} + setup_prepare() { modprobe netdevsim diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh index da49ad2761b5..109900c817be 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh @@ -24,13 +24,15 @@ ALL_TESTS=" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} -DEVLINK_DEV=netdevsim/${DEV} DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/ SLEEP_TIME=1 NETDEV="" NUM_NETIFS=0 source $lib_dir/lib.sh + +DEVLINK_DEV= source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} require_command udevadm @@ -163,6 +165,16 @@ trap_stats_test() devlink_trap_action_set $trap_name "drop" devlink_trap_stats_idle_test $trap_name check_err $? "Stats of trap $trap_name not idle when action is drop" + + echo "y"> $DEBUGFS_DIR/fail_trap_drop_counter_get + devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null + check_fail $? "Managed to read trap (hard dropped) statistics when should not" + echo "n"> $DEBUGFS_DIR/fail_trap_drop_counter_get + devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null + check_err $? "Did not manage to read trap (hard dropped) statistics when should" + + devlink_trap_drop_stats_idle_test $trap_name + check_fail $? "Drop stats of trap $trap_name idle when should not" else devlink_trap_stats_idle_test $trap_name check_fail $? "Stats of non-drop trap $trap_name idle when should not" diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh index 251f228ce63e..fc794cd30389 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh @@ -33,13 +33,15 @@ ALL_TESTS=" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} -DEVLINK_DEV=netdevsim/${DEV} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ NUM_NETIFS=0 source $lib_dir/lib.sh -source $lib_dir/devlink_lib.sh source $lib_dir/fib_offload_lib.sh +DEVLINK_DEV= +source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} + ipv4_identical_routes() { fib_ipv4_identical_routes_test "testns1" diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh index ba75c81cda91..e8e0dc088d6a 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -44,12 +44,14 @@ ALL_TESTS=" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} -DEVLINK_DEV=netdevsim/${DEV} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/ NUM_NETIFS=0 source $lib_dir/lib.sh + +DEVLINK_DEV= source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} nexthop_check() { diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh index ee10b1a8933c..e689ff7a0b12 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/psample.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh @@ -14,13 +14,15 @@ ALL_TESTS=" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} -DEVLINK_DEV=netdevsim/${DEV} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/ CAPTURE_FILE=$(mktemp) NUM_NETIFS=0 source $lib_dir/lib.sh + +DEVLINK_DEV= source $lib_dir/devlink_lib.sh +DEVLINK_DEV=netdevsim/${DEV} # Available at https://github.com/Mellanox/libpsample require_command psample diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index cf69b2fcce59..dd61118df66e 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -28,8 +28,8 @@ $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat cp $< $@ chmod -x $@ $(OUTPUT)/load_address_4096: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@ $(OUTPUT)/load_address_2097152: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@ $(OUTPUT)/load_address_16777216: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@ diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index bd83158e0e0b..524c857a049c 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -41,5 +41,6 @@ /kvm_create_max_vcpus /kvm_page_table_test /memslot_modification_stress_test +/memslot_perf_test /set_memory_region_test /steal_time diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index e439d027939d..daaee1888b12 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -33,7 +33,7 @@ ifeq ($(ARCH),s390) UNAME_M := s390x endif -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c @@ -74,6 +74,7 @@ TEST_GEN_PROGS_x86_64 += hardware_disable_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += kvm_page_table_test TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test +TEST_GEN_PROGS_x86_64 += memslot_perf_test TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 5f7a229c3af1..b74704305835 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -9,6 +9,7 @@ #define _GNU_SOURCE /* for pipe2 */ +#include <inttypes.h> #include <stdio.h> #include <stdlib.h> #include <time.h> @@ -38,6 +39,7 @@ static int nr_vcpus = 1; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +static size_t demand_paging_size; static char *guest_data_prototype; static void *vcpu_worker(void *data) @@ -71,36 +73,51 @@ static void *vcpu_worker(void *data) return NULL; } -static int handle_uffd_page_request(int uffd, uint64_t addr) +static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr) { - pid_t tid; + pid_t tid = syscall(__NR_gettid); struct timespec start; struct timespec ts_diff; - struct uffdio_copy copy; int r; - tid = syscall(__NR_gettid); + clock_gettime(CLOCK_MONOTONIC, &start); - copy.src = (uint64_t)guest_data_prototype; - copy.dst = addr; - copy.len = perf_test_args.host_page_size; - copy.mode = 0; + if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) { + struct uffdio_copy copy; - clock_gettime(CLOCK_MONOTONIC, &start); + copy.src = (uint64_t)guest_data_prototype; + copy.dst = addr; + copy.len = demand_paging_size; + copy.mode = 0; - r = ioctl(uffd, UFFDIO_COPY, ©); - if (r == -1) { - pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n", - addr, tid, errno); - return r; + r = ioctl(uffd, UFFDIO_COPY, ©); + if (r == -1) { + pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n", + addr, tid, errno); + return r; + } + } else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { + struct uffdio_continue cont = {0}; + + cont.range.start = addr; + cont.range.len = demand_paging_size; + + r = ioctl(uffd, UFFDIO_CONTINUE, &cont); + if (r == -1) { + pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n", + addr, tid, errno); + return r; + } + } else { + TEST_FAIL("Invalid uffd mode %d", uffd_mode); } ts_diff = timespec_elapsed(start); - PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, + PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid, timespec_to_ns(ts_diff)); PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", - perf_test_args.host_page_size, addr, tid); + demand_paging_size, addr, tid); return 0; } @@ -108,6 +125,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) bool quit_uffd_thread; struct uffd_handler_args { + int uffd_mode; int uffd; int pipefd; useconds_t delay; @@ -169,7 +187,7 @@ static void *uffd_handler_thread_fn(void *arg) if (r == -1) { if (errno == EAGAIN) continue; - pr_info("Read of uffd gor errno %d", errno); + pr_info("Read of uffd got errno %d\n", errno); return NULL; } @@ -184,7 +202,7 @@ static void *uffd_handler_thread_fn(void *arg) if (delay) usleep(delay); addr = msg.arg.pagefault.address; - r = handle_uffd_page_request(uffd, addr); + r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr); if (r < 0) return NULL; pages++; @@ -198,43 +216,53 @@ static void *uffd_handler_thread_fn(void *arg) return NULL; } -static int setup_demand_paging(struct kvm_vm *vm, - pthread_t *uffd_handler_thread, int pipefd, - useconds_t uffd_delay, - struct uffd_handler_args *uffd_args, - void *hva, uint64_t len) +static void setup_demand_paging(struct kvm_vm *vm, + pthread_t *uffd_handler_thread, int pipefd, + int uffd_mode, useconds_t uffd_delay, + struct uffd_handler_args *uffd_args, + void *hva, void *alias, uint64_t len) { + bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR); int uffd; struct uffdio_api uffdio_api; struct uffdio_register uffdio_register; + uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; - uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - if (uffd == -1) { - pr_info("uffd creation failed\n"); - return -1; + PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", + is_minor ? "MINOR" : "MISSING", + is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); + + /* In order to get minor faults, prefault via the alias. */ + if (is_minor) { + size_t p; + + expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE; + + TEST_ASSERT(alias != NULL, "Alias required for minor faults"); + for (p = 0; p < (len / demand_paging_size); ++p) { + memcpy(alias + (p * demand_paging_size), + guest_data_prototype, demand_paging_size); + } } + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno); + uffdio_api.api = UFFD_API; uffdio_api.features = 0; - if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { - pr_info("ioctl uffdio_api failed\n"); - return -1; - } + TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1, + "ioctl UFFDIO_API failed: %" PRIu64, + (uint64_t)uffdio_api.api); uffdio_register.range.start = (uint64_t)hva; uffdio_register.range.len = len; - uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { - pr_info("ioctl uffdio_register failed\n"); - return -1; - } - - if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) != - UFFD_API_RANGE_IOCTLS) { - pr_info("unexpected userfaultfd ioctl set\n"); - return -1; - } + uffdio_register.mode = uffd_mode; + TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1, + "ioctl UFFDIO_REGISTER failed"); + TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == + expected_ioctls, "missing userfaultfd ioctls"); + uffd_args->uffd_mode = uffd_mode; uffd_args->uffd = uffd; uffd_args->pipefd = pipefd; uffd_args->delay = uffd_delay; @@ -243,13 +271,12 @@ static int setup_demand_paging(struct kvm_vm *vm, PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", hva, hva + len); - - return 0; } struct test_params { - bool use_uffd; + int uffd_mode; useconds_t uffd_delay; + enum vm_mem_backing_src_type src_type; bool partition_vcpu_memory_access; }; @@ -267,14 +294,16 @@ static void run_test(enum vm_guest_mode mode, void *arg) int r; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, - VM_MEM_SRC_ANONYMOUS); + p->src_type); perf_test_args.wr_fract = 1; - guest_data_prototype = malloc(perf_test_args.host_page_size); + demand_paging_size = get_backing_src_pagesz(p->src_type); + + guest_data_prototype = malloc(demand_paging_size); TEST_ASSERT(guest_data_prototype, "Failed to allocate buffer for guest data pattern"); - memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); + memset(guest_data_prototype, 0xAB, demand_paging_size); vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); @@ -282,7 +311,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size, p->partition_vcpu_memory_access); - if (p->use_uffd) { + if (p->uffd_mode) { uffd_handler_threads = malloc(nr_vcpus * sizeof(*uffd_handler_threads)); TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); @@ -296,6 +325,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { vm_paddr_t vcpu_gpa; void *vcpu_hva; + void *vcpu_alias; uint64_t vcpu_mem_size; @@ -310,8 +340,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size); - /* Cache the HVA pointer of the region */ + /* Cache the host addresses of the region */ vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); + vcpu_alias = addr_gpa2alias(vm, vcpu_gpa); /* * Set up user fault fd to handle demand paging @@ -321,13 +352,11 @@ static void run_test(enum vm_guest_mode mode, void *arg) O_CLOEXEC | O_NONBLOCK); TEST_ASSERT(!r, "Failed to set up pipefd"); - r = setup_demand_paging(vm, - &uffd_handler_threads[vcpu_id], - pipefds[vcpu_id * 2], - p->uffd_delay, &uffd_args[vcpu_id], - vcpu_hva, vcpu_mem_size); - if (r < 0) - exit(-r); + setup_demand_paging(vm, &uffd_handler_threads[vcpu_id], + pipefds[vcpu_id * 2], p->uffd_mode, + p->uffd_delay, &uffd_args[vcpu_id], + vcpu_hva, vcpu_alias, + vcpu_mem_size); } } @@ -355,7 +384,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("All vCPU threads joined\n"); - if (p->use_uffd) { + if (p->uffd_mode) { char c; /* Tell the user fault fd handler threads to quit */ @@ -377,7 +406,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) free(guest_data_prototype); free(vcpu_threads); - if (p->use_uffd) { + if (p->uffd_mode) { free(uffd_handler_threads); free(uffd_args); free(pipefds); @@ -387,17 +416,19 @@ static void run_test(enum vm_guest_mode mode, void *arg) static void help(char *name) { puts(""); - printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n" - " [-b memory] [-v vcpus] [-o]\n", name); + printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n" + " [-b memory] [-t type] [-v vcpus] [-o]\n", name); guest_modes_help(); - printf(" -u: use User Fault FD to handle vCPU page\n" - " faults.\n"); + printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" + " UFFD registration mode: 'MISSING' or 'MINOR'.\n"); printf(" -d: add a delay in usec to the User Fault\n" " FD handler to simulate demand paging\n" " overheads. Ignored without -u.\n"); printf(" -b: specify the size of the memory region which should be\n" " demand paged by each vCPU. e.g. 10M or 3G.\n" " Default: 1G\n"); + printf(" -t: The type of backing memory to use. Default: anonymous\n"); + backing_src_help(); printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); @@ -409,19 +440,24 @@ int main(int argc, char *argv[]) { int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); struct test_params p = { + .src_type = VM_MEM_SRC_ANONYMOUS, .partition_vcpu_memory_access = true, }; int opt; guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:ud:b:v:o")) != -1) { + while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); break; case 'u': - p.use_uffd = true; + if (!strcmp("MISSING", optarg)) + p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING; + else if (!strcmp("MINOR", optarg)) + p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR; + TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'."); break; case 'd': p.uffd_delay = strtoul(optarg, NULL, 0); @@ -430,6 +466,9 @@ int main(int argc, char *argv[]) case 'b': guest_percpu_mem_size = parse_size(optarg); break; + case 't': + p.src_type = parse_backing_src_type(optarg); + break; case 'v': nr_vcpus = atoi(optarg); TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, @@ -445,6 +484,11 @@ int main(int argc, char *argv[]) } } + if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR && + !backing_src_is_shared(p.src_type)) { + TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t"); + } + for_each_guest_mode(run_test, &p); return 0; diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index 5aadf84c91c0..4b8db3bce610 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -132,6 +132,36 @@ static void run_test(uint32_t run) TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run); } +void wait_for_child_setup(pid_t pid) +{ + /* + * Wait for the child to post to the semaphore, but wake up periodically + * to check if the child exited prematurely. + */ + for (;;) { + const struct timespec wait_period = { .tv_sec = 1 }; + int status; + + if (!sem_timedwait(sem, &wait_period)) + return; + + /* Child is still running, keep waiting. */ + if (pid != waitpid(pid, &status, WNOHANG)) + continue; + + /* + * Child is no longer running, which is not expected. + * + * If it exited with a non-zero status, we explicitly forward + * the child's status in case it exited with KSFT_SKIP. + */ + if (WIFEXITED(status)) + exit(WEXITSTATUS(status)); + else + TEST_ASSERT(false, "Child exited unexpectedly"); + } +} + int main(int argc, char **argv) { uint32_t i; @@ -148,7 +178,7 @@ int main(int argc, char **argv) run_test(i); /* This function always exits */ pr_debug("%s: [%d] waiting semaphore\n", __func__, i); - sem_wait(sem); + wait_for_child_setup(pid); r = (rand() % DELAY_US_MAX) + 1; pr_debug("%s: [%d] waiting %dus\n", __func__, i, r); usleep(r); diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index a8f022794ce3..35739567189e 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -43,6 +43,7 @@ enum vm_guest_mode { VM_MODE_P40V48_4K, VM_MODE_P40V48_64K, VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ + VM_MODE_P47V64_4K, NUM_VM_MODES, }; @@ -60,7 +61,7 @@ enum vm_guest_mode { #elif defined(__s390x__) -#define VM_MODE_DEFAULT VM_MODE_P52V48_4K +#define VM_MODE_DEFAULT VM_MODE_P47V64_4K #define MIN_PAGE_SHIFT 12U #define ptes_per_page(page_size) ((page_size) / 16) @@ -77,6 +78,7 @@ struct vm_guest_mode_params { }; extern const struct vm_guest_mode_params vm_guest_mode_params[]; +int open_kvm_dev_path_or_exit(void); int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, @@ -146,6 +148,7 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); +void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); /* * Address Guest Virtual to Guest Physical @@ -283,10 +286,11 @@ struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_me uint32_t num_percpu_pages, void *guest_code, uint32_t vcpuids[]); -/* Like vm_create_default_with_vcpus, but accepts mode as a parameter */ +/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, - uint64_t extra_mem_pages, uint32_t num_percpu_pages, - void *guest_code, uint32_t vcpuids[]); + uint64_t slot0_mem_pages, uint64_t extra_mem_pages, + uint32_t num_percpu_pages, void *guest_code, + uint32_t vcpuids[]); /* * Adds a vCPU with reasonable defaults (e.g. a stack) @@ -302,7 +306,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm); unsigned int vm_get_page_size(struct kvm_vm *vm); unsigned int vm_get_page_shift(struct kvm_vm *vm); -unsigned int vm_get_max_gfn(struct kvm_vm *vm); +uint64_t vm_get_max_gfn(struct kvm_vm *vm); int vm_get_fd(struct kvm_vm *vm); unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index fade3130eb01..d79be15dd3d2 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -17,6 +17,7 @@ #include <errno.h> #include <unistd.h> #include <fcntl.h> +#include <sys/mman.h> #include "kselftest.h" static inline int _no_printf(const char *format, ...) { return 0; } @@ -84,6 +85,8 @@ enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB, VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB, VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB, + VM_MEM_SRC_SHMEM, + VM_MEM_SRC_SHARED_HUGETLB, NUM_SRC_TYPES, }; @@ -100,4 +103,13 @@ size_t get_backing_src_pagesz(uint32_t i); void backing_src_help(void); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); +/* + * Whether or not the given source type is shared memory (as opposed to + * anonymous). + */ +static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t) +{ + return vm_mem_backing_src_alias(t)->flag & MAP_SHARED; +} + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index 1c4753fff19e..82171f17c1d7 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -268,7 +268,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) /* Create a VM with enough guest pages */ guest_num_pages = test_mem_size / guest_page_size; - vm = vm_create_with_vcpus(mode, nr_vcpus, + vm = vm_create_with_vcpus(mode, nr_vcpus, DEFAULT_GUEST_PHY_PAGES, guest_num_pages, 0, guest_code, NULL); /* Align down GPA of the testing memslot */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index fc83f6c5902d..a2b732cf96ea 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -32,6 +32,34 @@ static void *align(void *x, size_t size) } /* + * Open KVM_DEV_PATH if available, otherwise exit the entire program. + * + * Input Args: + * flags - The flags to pass when opening KVM_DEV_PATH. + * + * Return: + * The opened file descriptor of /dev/kvm. + */ +static int _open_kvm_dev_path_or_exit(int flags) +{ + int fd; + + fd = open(KVM_DEV_PATH, flags); + if (fd < 0) { + print_skip("%s not available, is KVM loaded? (errno: %d)", + KVM_DEV_PATH, errno); + exit(KSFT_SKIP); + } + + return fd; +} + +int open_kvm_dev_path_or_exit(void) +{ + return _open_kvm_dev_path_or_exit(O_RDONLY); +} + +/* * Capability * * Input Args: @@ -52,12 +80,9 @@ int kvm_check_cap(long cap) int ret; int kvm_fd; - kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - if (kvm_fd < 0) - exit(KSFT_SKIP); - + kvm_fd = open_kvm_dev_path_or_exit(); ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); - TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" + TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n" " rc: %i errno: %i", ret, errno); close(kvm_fd); @@ -128,9 +153,7 @@ void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) static void vm_open(struct kvm_vm *vm, int perm) { - vm->kvm_fd = open(KVM_DEV_PATH, perm); - if (vm->kvm_fd < 0) - exit(KSFT_SKIP); + vm->kvm_fd = _open_kvm_dev_path_or_exit(perm); if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) { print_skip("immediate_exit not available"); @@ -152,6 +175,7 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages", [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", + [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -169,6 +193,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { { 40, 48, 0x1000, 12 }, { 40, 48, 0x10000, 16 }, { 0, 0, 0x1000, 12 }, + { 47, 64, 0x1000, 12 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); @@ -203,7 +228,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) TEST_ASSERT(vm != NULL, "Insufficient Memory"); INIT_LIST_HEAD(&vm->vcpus); - INIT_LIST_HEAD(&vm->userspace_mem_regions); + vm->regions.gpa_tree = RB_ROOT; + vm->regions.hva_tree = RB_ROOT; + hash_init(vm->regions.slot_hash); vm->mode = mode; vm->type = 0; @@ -252,6 +279,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms"); #endif break; + case VM_MODE_P47V64_4K: + vm->pgtable_levels = 5; + break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); } @@ -283,21 +313,50 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } +/* + * VM Create with customized parameters + * + * Input Args: + * mode - VM Mode (e.g. VM_MODE_P52V48_4K) + * nr_vcpus - VCPU count + * slot0_mem_pages - Slot0 physical memory size + * extra_mem_pages - Non-slot0 physical memory total size + * num_percpu_pages - Per-cpu physical memory pages + * guest_code - Guest entry point + * vcpuids - VCPU IDs + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + * + * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K), + * with customized slot0 memory size, at least 512 pages currently. + * extra_mem_pages is only used to calculate the maximum page table size, + * no real memory allocation for non-slot0 memory in this function. + */ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, - uint64_t extra_mem_pages, uint32_t num_percpu_pages, - void *guest_code, uint32_t vcpuids[]) + uint64_t slot0_mem_pages, uint64_t extra_mem_pages, + uint32_t num_percpu_pages, void *guest_code, + uint32_t vcpuids[]) { + uint64_t vcpu_pages, extra_pg_pages, pages; + struct kvm_vm *vm; + int i; + + /* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */ + if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES) + slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES; + /* The maximum page table size for a memory region will be when the * smallest pages are used. Considering each page contains x page * table descriptors, the total extra size for page tables (for extra * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller * than N/x*2. */ - uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus; - uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2; - uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages; - struct kvm_vm *vm; - int i; + vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus; + extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2; + pages = slot0_mem_pages + vcpu_pages + extra_pg_pages; TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), "nr_vcpus = %d too large for host, max-vcpus = %d", @@ -329,8 +388,8 @@ struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_me uint32_t num_percpu_pages, void *guest_code, uint32_t vcpuids[]) { - return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, extra_mem_pages, - num_percpu_pages, guest_code, vcpuids); + return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES, + extra_mem_pages, num_percpu_pages, guest_code, vcpuids); } struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, @@ -355,13 +414,14 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, */ void kvm_vm_restart(struct kvm_vm *vmp, int perm) { + int ctr; struct userspace_mem_region *region; vm_open(vmp, perm); if (vmp->has_irqchip) vm_create_irqchip(vmp); - list_for_each_entry(region, &vmp->userspace_mem_regions, list) { + hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" " rc: %i errno: %i\n" @@ -424,14 +484,21 @@ uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) static struct userspace_mem_region * userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) { - struct userspace_mem_region *region; + struct rb_node *node; - list_for_each_entry(region, &vm->userspace_mem_regions, list) { + for (node = vm->regions.gpa_tree.rb_node; node; ) { + struct userspace_mem_region *region = + container_of(node, struct userspace_mem_region, gpa_node); uint64_t existing_start = region->region.guest_phys_addr; uint64_t existing_end = region->region.guest_phys_addr + region->region.memory_size - 1; if (start <= existing_end && end >= existing_start) return region; + + if (start < existing_start) + node = node->rb_left; + else + node = node->rb_right; } return NULL; @@ -546,11 +613,16 @@ void kvm_vm_release(struct kvm_vm *vmp) } static void __vm_mem_region_delete(struct kvm_vm *vm, - struct userspace_mem_region *region) + struct userspace_mem_region *region, + bool unlink) { int ret; - list_del(®ion->list); + if (unlink) { + rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); + rb_erase(®ion->hva_node, &vm->regions.hva_tree); + hash_del(®ion->slot_node); + } region->region.memory_size = 0; ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); @@ -569,14 +641,16 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, */ void kvm_vm_free(struct kvm_vm *vmp) { - struct userspace_mem_region *region, *tmp; + int ctr; + struct hlist_node *node; + struct userspace_mem_region *region; if (vmp == NULL) return; /* Free userspace_mem_regions. */ - list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list) - __vm_mem_region_delete(vmp, region); + hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) + __vm_mem_region_delete(vmp, region, false); /* Free sparsebit arrays. */ sparsebit_free(&vmp->vpages_valid); @@ -658,13 +732,64 @@ int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) return 0; } +static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, + struct userspace_mem_region *region) +{ + struct rb_node **cur, *parent; + + for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) { + struct userspace_mem_region *cregion; + + cregion = container_of(*cur, typeof(*cregion), gpa_node); + parent = *cur; + if (region->region.guest_phys_addr < + cregion->region.guest_phys_addr) + cur = &(*cur)->rb_left; + else { + TEST_ASSERT(region->region.guest_phys_addr != + cregion->region.guest_phys_addr, + "Duplicate GPA in region tree"); + + cur = &(*cur)->rb_right; + } + } + + rb_link_node(®ion->gpa_node, parent, cur); + rb_insert_color(®ion->gpa_node, gpa_tree); +} + +static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree, + struct userspace_mem_region *region) +{ + struct rb_node **cur, *parent; + + for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) { + struct userspace_mem_region *cregion; + + cregion = container_of(*cur, typeof(*cregion), hva_node); + parent = *cur; + if (region->host_mem < cregion->host_mem) + cur = &(*cur)->rb_left; + else { + TEST_ASSERT(region->host_mem != + cregion->host_mem, + "Duplicate HVA in region tree"); + + cur = &(*cur)->rb_right; + } + } + + rb_link_node(®ion->hva_node, parent, cur); + rb_insert_color(®ion->hva_node, hva_tree); +} + /* * VM Userspace Memory Region Add * * Input Args: * vm - Virtual Machine - * backing_src - Storage source for this region. - * NULL to use anonymous memory. + * src_type - Storage source for this region. + * NULL to use anonymous memory. * guest_paddr - Starting guest physical address * slot - KVM region slot * npages - Number of physical pages @@ -722,7 +847,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, (uint64_t) region->region.memory_size); /* Confirm no region with the requested slot already exists. */ - list_for_each_entry(region, &vm->userspace_mem_regions, list) { + hash_for_each_possible(vm->regions.slot_hash, region, slot_node, + slot) { if (region->region.slot != slot) continue; @@ -755,11 +881,30 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, if (alignment > 1) region->mmap_size += alignment; + region->fd = -1; + if (backing_src_is_shared(src_type)) { + int memfd_flags = MFD_CLOEXEC; + + if (src_type == VM_MEM_SRC_SHARED_HUGETLB) + memfd_flags |= MFD_HUGETLB; + + region->fd = memfd_create("kvm_selftest", memfd_flags); + TEST_ASSERT(region->fd != -1, + "memfd_create failed, errno: %i", errno); + + ret = ftruncate(region->fd, region->mmap_size); + TEST_ASSERT(ret == 0, "ftruncate failed, errno: %i", errno); + + ret = fallocate(region->fd, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, + region->mmap_size); + TEST_ASSERT(ret == 0, "fallocate failed, errno: %i", errno); + } + region->mmap_start = mmap(NULL, region->mmap_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS - | vm_mem_backing_src_alias(src_type)->flag, - -1, 0); + vm_mem_backing_src_alias(src_type)->flag, + region->fd, 0); TEST_ASSERT(region->mmap_start != MAP_FAILED, "test_malloc failed, mmap_start: %p errno: %i", region->mmap_start, errno); @@ -793,8 +938,23 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, ret, errno, slot, flags, guest_paddr, (uint64_t) region->region.memory_size); - /* Add to linked-list of memory regions. */ - list_add(®ion->list, &vm->userspace_mem_regions); + /* Add to quick lookup data structures */ + vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); + vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region); + hash_add(vm->regions.slot_hash, ®ion->slot_node, slot); + + /* If shared memory, create an alias. */ + if (region->fd >= 0) { + region->mmap_alias = mmap(NULL, region->mmap_size, + PROT_READ | PROT_WRITE, + vm_mem_backing_src_alias(src_type)->flag, + region->fd, 0); + TEST_ASSERT(region->mmap_alias != MAP_FAILED, + "mmap of alias failed, errno: %i", errno); + + /* Align host alias address */ + region->host_alias = align(region->mmap_alias, alignment); + } } /* @@ -817,10 +977,10 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot) { struct userspace_mem_region *region; - list_for_each_entry(region, &vm->userspace_mem_regions, list) { + hash_for_each_possible(vm->regions.slot_hash, region, slot_node, + memslot) if (region->region.slot == memslot) return region; - } fprintf(stderr, "No mem region with the requested slot found,\n" " requested slot: %u\n", memslot); @@ -905,7 +1065,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) */ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) { - __vm_mem_region_delete(vm, memslot2region(vm, slot)); + __vm_mem_region_delete(vm, memslot2region(vm, slot), true); } /* @@ -925,9 +1085,7 @@ static int vcpu_mmap_sz(void) { int dev_fd, ret; - dev_fd = open(KVM_DEV_PATH, O_RDONLY); - if (dev_fd < 0) - exit(KSFT_SKIP); + dev_fd = open_kvm_dev_path_or_exit(); ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); TEST_ASSERT(ret >= sizeof(struct kvm_run), @@ -1099,6 +1257,9 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); virt_pgd_alloc(vm, pgd_memslot); + vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages, + KVM_UTIL_MIN_PFN * vm->page_size, + data_memslot); /* * Find an unused range of virtual page addresses of at least @@ -1108,11 +1269,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, /* Map the virtual pages. */ for (vm_vaddr_t vaddr = vaddr_start; pages > 0; - pages--, vaddr += vm->page_size) { - vm_paddr_t paddr; - - paddr = vm_phy_page_alloc(vm, - KVM_UTIL_MIN_PFN * vm->page_size, data_memslot); + pages--, vaddr += vm->page_size, paddr += vm->page_size) { virt_pg_map(vm, vaddr, paddr, pgd_memslot); @@ -1177,16 +1334,14 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) { struct userspace_mem_region *region; - list_for_each_entry(region, &vm->userspace_mem_regions, list) { - if ((gpa >= region->region.guest_phys_addr) - && (gpa <= (region->region.guest_phys_addr - + region->region.memory_size - 1))) - return (void *) ((uintptr_t) region->host_mem - + (gpa - region->region.guest_phys_addr)); + region = userspace_mem_region_find(vm, gpa, gpa); + if (!region) { + TEST_FAIL("No vm physical memory at 0x%lx", gpa); + return NULL; } - TEST_FAIL("No vm physical memory at 0x%lx", gpa); - return NULL; + return (void *)((uintptr_t)region->host_mem + + (gpa - region->region.guest_phys_addr)); } /* @@ -1208,15 +1363,22 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) */ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) { - struct userspace_mem_region *region; + struct rb_node *node; - list_for_each_entry(region, &vm->userspace_mem_regions, list) { - if ((hva >= region->host_mem) - && (hva <= (region->host_mem - + region->region.memory_size - 1))) - return (vm_paddr_t) ((uintptr_t) - region->region.guest_phys_addr - + (hva - (uintptr_t) region->host_mem)); + for (node = vm->regions.hva_tree.rb_node; node; ) { + struct userspace_mem_region *region = + container_of(node, struct userspace_mem_region, hva_node); + + if (hva >= region->host_mem) { + if (hva <= (region->host_mem + + region->region.memory_size - 1)) + return (vm_paddr_t)((uintptr_t) + region->region.guest_phys_addr + + (hva - (uintptr_t)region->host_mem)); + + node = node->rb_right; + } else + node = node->rb_left; } TEST_FAIL("No mapping to a guest physical address, hva: %p", hva); @@ -1224,6 +1386,42 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) } /* + * Address VM physical to Host Virtual *alias*. + * + * Input Args: + * vm - Virtual Machine + * gpa - VM physical address + * + * Output Args: None + * + * Return: + * Equivalent address within the host virtual *alias* area, or NULL + * (without failing the test) if the guest memory is not shared (so + * no alias exists). + * + * When vm_create() and related functions are called with a shared memory + * src_type, we also create a writable, shared alias mapping of the + * underlying guest memory. This allows the host to manipulate guest memory + * without mapping that memory in the guest's address space. And, for + * userfaultfd-based demand paging, we can do so without triggering userfaults. + */ +void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) +{ + struct userspace_mem_region *region; + uintptr_t offset; + + region = userspace_mem_region_find(vm, gpa, gpa); + if (!region) + return NULL; + + if (!region->host_alias) + return NULL; + + offset = gpa - region->region.guest_phys_addr; + return (void *) ((uintptr_t) region->host_alias + offset); +} + +/* * VM Create IRQ Chip * * Input Args: @@ -1822,6 +2020,7 @@ int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, */ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { + int ctr; struct userspace_mem_region *region; struct vcpu *vcpu; @@ -1829,7 +2028,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); fprintf(stream, "%*sMem Regions:\n", indent, ""); - list_for_each_entry(region, &vm->userspace_mem_regions, list) { + hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) { fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " "host_virt: %p\n", indent + 2, "", (uint64_t) region->region.guest_phys_addr, @@ -2015,10 +2214,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm) if (vm == NULL) { /* Ensure that the KVM vendor-specific module is loaded. */ - f = fopen(KVM_DEV_PATH, "r"); - TEST_ASSERT(f != NULL, "Error in opening KVM dev file: %d", - errno); - fclose(f); + close(open_kvm_dev_path_or_exit()); } f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r"); @@ -2041,7 +2237,7 @@ unsigned int vm_get_page_shift(struct kvm_vm *vm) return vm->page_shift; } -unsigned int vm_get_max_gfn(struct kvm_vm *vm) +uint64_t vm_get_max_gfn(struct kvm_vm *vm) { return vm->max_gfn; } diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 91ce1b5d480b..a03febc24ba6 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -8,6 +8,9 @@ #ifndef SELFTEST_KVM_UTIL_INTERNAL_H #define SELFTEST_KVM_UTIL_INTERNAL_H +#include "linux/hashtable.h" +#include "linux/rbtree.h" + #include "sparsebit.h" struct userspace_mem_region { @@ -16,9 +19,13 @@ struct userspace_mem_region { int fd; off_t offset; void *host_mem; + void *host_alias; void *mmap_start; + void *mmap_alias; size_t mmap_size; - struct list_head list; + struct rb_node gpa_node; + struct rb_node hva_node; + struct hlist_node slot_node; }; struct vcpu { @@ -31,6 +38,12 @@ struct vcpu { uint32_t dirty_gfns_count; }; +struct userspace_mem_regions { + struct rb_root gpa_tree; + struct rb_root hva_tree; + DECLARE_HASHTABLE(slot_hash, 9); +}; + struct kvm_vm { int mode; unsigned long type; @@ -43,7 +56,7 @@ struct kvm_vm { unsigned int va_bits; uint64_t max_gfn; struct list_head vcpus; - struct list_head userspace_mem_regions; + struct userspace_mem_regions regions; struct sparsebit *vpages_valid; struct sparsebit *vpages_mapped; bool has_irqchip; diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index 81490b9b4e32..7397ca299835 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2020, Google LLC. */ +#include <inttypes.h> #include "kvm_util.h" #include "perf_test_util.h" @@ -68,7 +69,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, "Guest memory size is not guest page size aligned."); - vm = vm_create_with_vcpus(mode, vcpus, + vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES, (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size, 0, guest_code, NULL); @@ -80,7 +81,8 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, */ TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", + " guest pages: %" PRIx64 " max gfn: %" PRIx64 + " vcpus: %d wss: %" PRIx64 "]\n", guest_num_pages, vm_get_max_gfn(vm), vcpus, vcpu_memory_bytes); diff --git a/tools/testing/selftests/kvm/lib/rbtree.c b/tools/testing/selftests/kvm/lib/rbtree.c new file mode 100644 index 000000000000..a703f0194ea3 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/rbtree.c @@ -0,0 +1 @@ +#include "../../../../lib/rbtree.c" diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 63d2bc7d757b..af1031fed97f 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -166,72 +166,89 @@ size_t get_def_hugetlb_pagesz(void) return 0; } +#define ANON_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS) +#define ANON_HUGE_FLAGS (ANON_FLAGS | MAP_HUGETLB) + const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i) { static const struct vm_mem_backing_src_alias aliases[] = { [VM_MEM_SRC_ANONYMOUS] = { .name = "anonymous", - .flag = 0, + .flag = ANON_FLAGS, }, [VM_MEM_SRC_ANONYMOUS_THP] = { .name = "anonymous_thp", - .flag = 0, + .flag = ANON_FLAGS, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB] = { .name = "anonymous_hugetlb", - .flag = MAP_HUGETLB, + .flag = ANON_HUGE_FLAGS, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = { .name = "anonymous_hugetlb_16kb", - .flag = MAP_HUGETLB | MAP_HUGE_16KB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_16KB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = { .name = "anonymous_hugetlb_64kb", - .flag = MAP_HUGETLB | MAP_HUGE_64KB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_64KB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = { .name = "anonymous_hugetlb_512kb", - .flag = MAP_HUGETLB | MAP_HUGE_512KB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_512KB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = { .name = "anonymous_hugetlb_1mb", - .flag = MAP_HUGETLB | MAP_HUGE_1MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_1MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = { .name = "anonymous_hugetlb_2mb", - .flag = MAP_HUGETLB | MAP_HUGE_2MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_2MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = { .name = "anonymous_hugetlb_8mb", - .flag = MAP_HUGETLB | MAP_HUGE_8MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_8MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = { .name = "anonymous_hugetlb_16mb", - .flag = MAP_HUGETLB | MAP_HUGE_16MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_16MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = { .name = "anonymous_hugetlb_32mb", - .flag = MAP_HUGETLB | MAP_HUGE_32MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_32MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = { .name = "anonymous_hugetlb_256mb", - .flag = MAP_HUGETLB | MAP_HUGE_256MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_256MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = { .name = "anonymous_hugetlb_512mb", - .flag = MAP_HUGETLB | MAP_HUGE_512MB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_512MB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = { .name = "anonymous_hugetlb_1gb", - .flag = MAP_HUGETLB | MAP_HUGE_1GB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_1GB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = { .name = "anonymous_hugetlb_2gb", - .flag = MAP_HUGETLB | MAP_HUGE_2GB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_2GB, }, [VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = { .name = "anonymous_hugetlb_16gb", - .flag = MAP_HUGETLB | MAP_HUGE_16GB, + .flag = ANON_HUGE_FLAGS | MAP_HUGE_16GB, + }, + [VM_MEM_SRC_SHMEM] = { + .name = "shmem", + .flag = MAP_SHARED, + }, + [VM_MEM_SRC_SHARED_HUGETLB] = { + .name = "shared_hugetlb", + /* + * No MAP_HUGETLB, we use MFD_HUGETLB instead. Since + * we're using "file backed" memory, we need to specify + * this when the FD is created, not when the area is + * mapped. + */ + .flag = MAP_SHARED, }, }; _Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES, @@ -250,10 +267,12 @@ size_t get_backing_src_pagesz(uint32_t i) switch (i) { case VM_MEM_SRC_ANONYMOUS: + case VM_MEM_SRC_SHMEM: return getpagesize(); case VM_MEM_SRC_ANONYMOUS_THP: return get_trans_hugepagesz(); case VM_MEM_SRC_ANONYMOUS_HUGETLB: + case VM_MEM_SRC_SHARED_HUGETLB: return get_def_hugetlb_pagesz(); default: return MAP_HUGE_PAGE_SIZE(flag); diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S index aaf7bc7d2ce1..7629819734af 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S +++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S @@ -54,9 +54,9 @@ idt_handlers: .align 8 /* Fetch current address and append it to idt_handlers. */ - current_handler = . +666 : .pushsection .rodata -.quad current_handler + .quad 666b .popsection .if ! \has_error diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index a8906e60a108..efe235044421 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -657,9 +657,7 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void) return cpuid; cpuid = allocate_kvm_cpuid2(); - kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - if (kvm_fd < 0) - exit(KSFT_SKIP); + kvm_fd = open_kvm_dev_path_or_exit(); ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", @@ -691,9 +689,7 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index) buffer.header.nmsrs = 1; buffer.entry.index = msr_index; - kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - if (kvm_fd < 0) - exit(KSFT_SKIP); + kvm_fd = open_kvm_dev_path_or_exit(); r = ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header); TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" @@ -986,9 +982,7 @@ struct kvm_msr_list *kvm_get_msr_index_list(void) struct kvm_msr_list *list; int nmsrs, r, kvm_fd; - kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - if (kvm_fd < 0) - exit(KSFT_SKIP); + kvm_fd = open_kvm_dev_path_or_exit(); nmsrs = kvm_get_num_msrs_fd(kvm_fd); list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); @@ -1312,9 +1306,7 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) return cpuid; cpuid = allocate_kvm_cpuid2(); - kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - if (kvm_fd < 0) - exit(KSFT_SKIP); + kvm_fd = open_kvm_dev_path_or_exit(); ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_HV_CPUID failed %d %d\n", diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 6096bf0a5b34..98351ba0933c 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -71,14 +71,22 @@ struct memslot_antagonist_args { }; static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, - uint64_t nr_modifications, uint64_t gpa) + uint64_t nr_modifications) { + const uint64_t pages = 1; + uint64_t gpa; int i; + /* + * Add the dummy memslot just below the perf_test_util memslot, which is + * at the top of the guest physical address space. + */ + gpa = guest_test_phys_mem - pages * vm_get_page_size(vm); + for (i = 0; i < nr_modifications; i++) { usleep(delay); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa, - DUMMY_MEMSLOT_INDEX, 1, 0); + DUMMY_MEMSLOT_INDEX, pages, 0); vm_mem_region_delete(vm, DUMMY_MEMSLOT_INDEX); } @@ -120,11 +128,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("Started all vCPUs\n"); add_remove_memslot(vm, p->memslot_modification_delay, - p->nr_memslot_modifications, - guest_test_phys_mem + - (guest_percpu_mem_size * nr_vcpus) + - perf_test_args.host_page_size + - perf_test_args.guest_page_size); + p->nr_memslot_modifications); run_vcpus = false; diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c new file mode 100644 index 000000000000..11239652d805 --- /dev/null +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -0,0 +1,1037 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A memslot-related performance benchmark. + * + * Copyright (C) 2021 Oracle and/or its affiliates. + * + * Basic guest setup / host vCPU thread code lifted from set_memory_region_test. + */ +#include <pthread.h> +#include <sched.h> +#include <semaphore.h> +#include <stdatomic.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <time.h> +#include <unistd.h> + +#include <linux/compiler.h> + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +#define VCPU_ID 0 + +#define MEM_SIZE ((512U << 20) + 4096) +#define MEM_SIZE_PAGES (MEM_SIZE / 4096) +#define MEM_GPA 0x10000000UL +#define MEM_AUX_GPA MEM_GPA +#define MEM_SYNC_GPA MEM_AUX_GPA +#define MEM_TEST_GPA (MEM_AUX_GPA + 4096) +#define MEM_TEST_SIZE (MEM_SIZE - 4096) +static_assert(MEM_SIZE % 4096 == 0, "invalid mem size"); +static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size"); + +/* + * 32 MiB is max size that gets well over 100 iterations on 509 slots. + * Considering that each slot needs to have at least one page up to + * 8194 slots in use can then be tested (although with slightly + * limited resolution). + */ +#define MEM_SIZE_MAP ((32U << 20) + 4096) +#define MEM_SIZE_MAP_PAGES (MEM_SIZE_MAP / 4096) +#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - 4096) +#define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096) +static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size"); +static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size"); +static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size"); +static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size"); + +/* + * 128 MiB is min size that fills 32k slots with at least one page in each + * while at the same time gets 100+ iterations in such test + */ +#define MEM_TEST_UNMAP_SIZE (128U << 20) +#define MEM_TEST_UNMAP_SIZE_PAGES (MEM_TEST_UNMAP_SIZE / 4096) +/* 2 MiB chunk size like a typical huge page */ +#define MEM_TEST_UNMAP_CHUNK_PAGES (2U << (20 - 12)) +static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE, + "invalid unmap test region size"); +static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0, + "invalid unmap test region size"); +static_assert(MEM_TEST_UNMAP_SIZE_PAGES % + (2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0, + "invalid unmap test region size"); + +/* + * For the move active test the middle of the test area is placed on + * a memslot boundary: half lies in the memslot being moved, half in + * other memslot(s). + * + * When running this test with 32k memslots (32764, really) each memslot + * contains 4 pages. + * The last one additionally contains the remaining 21 pages of memory, + * for the total size of 25 pages. + * Hence, the maximum size here is 50 pages. + */ +#define MEM_TEST_MOVE_SIZE_PAGES (50) +#define MEM_TEST_MOVE_SIZE (MEM_TEST_MOVE_SIZE_PAGES * 4096) +#define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE) +static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE, + "invalid move test region size"); + +#define MEM_TEST_VAL_1 0x1122334455667788 +#define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00 + +struct vm_data { + struct kvm_vm *vm; + pthread_t vcpu_thread; + uint32_t nslots; + uint64_t npages; + uint64_t pages_per_slot; + void **hva_slots; + bool mmio_ok; + uint64_t mmio_gpa_min; + uint64_t mmio_gpa_max; +}; + +struct sync_area { + atomic_bool start_flag; + atomic_bool exit_flag; + atomic_bool sync_flag; + void *move_area_ptr; +}; + +/* + * Technically, we need also for the atomic bool to be address-free, which + * is recommended, but not strictly required, by C11 for lockless + * implementations. + * However, in practice both GCC and Clang fulfill this requirement on + * all KVM-supported platforms. + */ +static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless"); + +static sem_t vcpu_ready; + +static bool map_unmap_verify; + +static bool verbose; +#define pr_info_v(...) \ + do { \ + if (verbose) \ + pr_info(__VA_ARGS__); \ + } while (0) + +static void *vcpu_worker(void *data) +{ + struct vm_data *vm = data; + struct kvm_run *run; + struct ucall uc; + uint64_t cmd; + + run = vcpu_state(vm->vm, VCPU_ID); + while (1) { + vcpu_run(vm->vm, VCPU_ID); + + if (run->exit_reason == KVM_EXIT_IO) { + cmd = get_ucall(vm->vm, VCPU_ID, &uc); + if (cmd != UCALL_SYNC) + break; + + sem_post(&vcpu_ready); + continue; + } + + if (run->exit_reason != KVM_EXIT_MMIO) + break; + + TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit"); + TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read"); + TEST_ASSERT(run->mmio.len == 8, + "Unexpected exit mmio size = %u", run->mmio.len); + TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min && + run->mmio.phys_addr <= vm->mmio_gpa_max, + "Unexpected exit mmio address = 0x%llx", + run->mmio.phys_addr); + } + + if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT) + TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0], + __FILE__, uc.args[1], uc.args[2]); + + return NULL; +} + +static void wait_for_vcpu(void) +{ + struct timespec ts; + + TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts), + "clock_gettime() failed: %d\n", errno); + + ts.tv_sec += 2; + TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts), + "sem_timedwait() failed: %d\n", errno); +} + +static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) +{ + uint64_t gpage, pgoffs; + uint32_t slot, slotoffs; + void *base; + + TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate"); + TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096, + "Too high gpa to translate"); + gpa -= MEM_GPA; + + gpage = gpa / 4096; + pgoffs = gpa % 4096; + slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1); + slotoffs = gpage - (slot * data->pages_per_slot); + + if (rempages) { + uint64_t slotpages; + + if (slot == data->nslots - 1) + slotpages = data->npages - slot * data->pages_per_slot; + else + slotpages = data->pages_per_slot; + + TEST_ASSERT(!pgoffs, + "Asking for remaining pages in slot but gpa not page aligned"); + *rempages = slotpages - slotoffs; + } + + base = data->hva_slots[slot]; + return (uint8_t *)base + slotoffs * 4096 + pgoffs; +} + +static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot) +{ + TEST_ASSERT(slot < data->nslots, "Too high slot number"); + + return MEM_GPA + slot * data->pages_per_slot * 4096; +} + +static struct vm_data *alloc_vm(void) +{ + struct vm_data *data; + + data = malloc(sizeof(*data)); + TEST_ASSERT(data, "malloc(vmdata) failed"); + + data->vm = NULL; + data->hva_slots = NULL; + + return data; +} + +static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, + void *guest_code, uint64_t mempages, + struct timespec *slot_runtime) +{ + uint32_t max_mem_slots; + uint64_t rempages; + uint64_t guest_addr; + uint32_t slot; + struct timespec tstart; + struct sync_area *sync; + + max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); + TEST_ASSERT(max_mem_slots > 1, + "KVM_CAP_NR_MEMSLOTS should be greater than 1"); + TEST_ASSERT(nslots > 1 || nslots == -1, + "Slot count cap should be greater than 1"); + if (nslots != -1) + max_mem_slots = min(max_mem_slots, (uint32_t)nslots); + pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots); + + TEST_ASSERT(mempages > 1, + "Can't test without any memory"); + + data->npages = mempages; + data->nslots = max_mem_slots - 1; + data->pages_per_slot = mempages / data->nslots; + if (!data->pages_per_slot) { + *maxslots = mempages + 1; + return false; + } + + rempages = mempages % data->nslots; + data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots); + TEST_ASSERT(data->hva_slots, "malloc() fail"); + + data->vm = vm_create_default(VCPU_ID, mempages, guest_code); + + pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n", + max_mem_slots - 1, data->pages_per_slot, rempages); + + clock_gettime(CLOCK_MONOTONIC, &tstart); + for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) { + uint64_t npages; + + npages = data->pages_per_slot; + if (slot == max_mem_slots - 1) + npages += rempages; + + vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS, + guest_addr, slot, npages, + 0); + guest_addr += npages * 4096; + } + *slot_runtime = timespec_elapsed(tstart); + + for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) { + uint64_t npages; + uint64_t gpa; + + npages = data->pages_per_slot; + if (slot == max_mem_slots - 2) + npages += rempages; + + gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, + slot + 1); + TEST_ASSERT(gpa == guest_addr, + "vm_phy_pages_alloc() failed\n"); + + data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr); + memset(data->hva_slots[slot], 0, npages * 4096); + + guest_addr += npages * 4096; + } + + virt_map(data->vm, MEM_GPA, MEM_GPA, mempages, 0); + + sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); + atomic_init(&sync->start_flag, false); + atomic_init(&sync->exit_flag, false); + atomic_init(&sync->sync_flag, false); + + data->mmio_ok = false; + + return true; +} + +static void launch_vm(struct vm_data *data) +{ + pr_info_v("Launching the test VM\n"); + + pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data); + + /* Ensure the guest thread is spun up. */ + wait_for_vcpu(); +} + +static void free_vm(struct vm_data *data) +{ + kvm_vm_free(data->vm); + free(data->hva_slots); + free(data); +} + +static void wait_guest_exit(struct vm_data *data) +{ + pthread_join(data->vcpu_thread, NULL); +} + +static void let_guest_run(struct sync_area *sync) +{ + atomic_store_explicit(&sync->start_flag, true, memory_order_release); +} + +static void guest_spin_until_start(void) +{ + struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + + while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire)) + ; +} + +static void make_guest_exit(struct sync_area *sync) +{ + atomic_store_explicit(&sync->exit_flag, true, memory_order_release); +} + +static bool _guest_should_exit(void) +{ + struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + + return atomic_load_explicit(&sync->exit_flag, memory_order_acquire); +} + +#define guest_should_exit() unlikely(_guest_should_exit()) + +/* + * noinline so we can easily see how much time the host spends waiting + * for the guest. + * For the same reason use alarm() instead of polling clock_gettime() + * to implement a wait timeout. + */ +static noinline void host_perform_sync(struct sync_area *sync) +{ + alarm(2); + + atomic_store_explicit(&sync->sync_flag, true, memory_order_release); + while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire)) + ; + + alarm(0); +} + +static bool guest_perform_sync(void) +{ + struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + bool expected; + + do { + if (guest_should_exit()) + return false; + + expected = true; + } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag, + &expected, false, + memory_order_acq_rel, + memory_order_relaxed)); + + return true; +} + +static void guest_code_test_memslot_move(void) +{ + struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr); + + GUEST_SYNC(0); + + guest_spin_until_start(); + + while (!guest_should_exit()) { + uintptr_t ptr; + + for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE; + ptr += 4096) + *(uint64_t *)ptr = MEM_TEST_VAL_1; + + /* + * No host sync here since the MMIO exits are so expensive + * that the host would spend most of its time waiting for + * the guest and so instead of measuring memslot move + * performance we would measure the performance and + * likelihood of MMIO exits + */ + } + + GUEST_DONE(); +} + +static void guest_code_test_memslot_map(void) +{ + struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + + GUEST_SYNC(0); + + guest_spin_until_start(); + + while (1) { + uintptr_t ptr; + + for (ptr = MEM_TEST_GPA; + ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096) + *(uint64_t *)ptr = MEM_TEST_VAL_1; + + if (!guest_perform_sync()) + break; + + for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; + ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096) + *(uint64_t *)ptr = MEM_TEST_VAL_2; + + if (!guest_perform_sync()) + break; + } + + GUEST_DONE(); +} + +static void guest_code_test_memslot_unmap(void) +{ + struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA; + + GUEST_SYNC(0); + + guest_spin_until_start(); + + while (1) { + uintptr_t ptr = MEM_TEST_GPA; + + /* + * We can afford to access (map) just a small number of pages + * per host sync as otherwise the host will spend + * a significant amount of its time waiting for the guest + * (instead of doing unmap operations), so this will + * effectively turn this test into a map performance test. + * + * Just access a single page to be on the safe side. + */ + *(uint64_t *)ptr = MEM_TEST_VAL_1; + + if (!guest_perform_sync()) + break; + + ptr += MEM_TEST_UNMAP_SIZE / 2; + *(uint64_t *)ptr = MEM_TEST_VAL_2; + + if (!guest_perform_sync()) + break; + } + + GUEST_DONE(); +} + +static void guest_code_test_memslot_rw(void) +{ + GUEST_SYNC(0); + + guest_spin_until_start(); + + while (1) { + uintptr_t ptr; + + for (ptr = MEM_TEST_GPA; + ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) + *(uint64_t *)ptr = MEM_TEST_VAL_1; + + if (!guest_perform_sync()) + break; + + for (ptr = MEM_TEST_GPA + 4096 / 2; + ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) { + uint64_t val = *(uint64_t *)ptr; + + GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val); + *(uint64_t *)ptr = 0; + } + + if (!guest_perform_sync()) + break; + } + + GUEST_DONE(); +} + +static bool test_memslot_move_prepare(struct vm_data *data, + struct sync_area *sync, + uint64_t *maxslots, bool isactive) +{ + uint64_t movesrcgpa, movetestgpa; + + movesrcgpa = vm_slot2gpa(data, data->nslots - 1); + + if (isactive) { + uint64_t lastpages; + + vm_gpa2hva(data, movesrcgpa, &lastpages); + if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) { + *maxslots = 0; + return false; + } + } + + movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1)); + sync->move_area_ptr = (void *)movetestgpa; + + if (isactive) { + data->mmio_ok = true; + data->mmio_gpa_min = movesrcgpa; + data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1; + } + + return true; +} + +static bool test_memslot_move_prepare_active(struct vm_data *data, + struct sync_area *sync, + uint64_t *maxslots) +{ + return test_memslot_move_prepare(data, sync, maxslots, true); +} + +static bool test_memslot_move_prepare_inactive(struct vm_data *data, + struct sync_area *sync, + uint64_t *maxslots) +{ + return test_memslot_move_prepare(data, sync, maxslots, false); +} + +static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync) +{ + uint64_t movesrcgpa; + + movesrcgpa = vm_slot2gpa(data, data->nslots - 1); + vm_mem_region_move(data->vm, data->nslots - 1 + 1, + MEM_TEST_MOVE_GPA_DEST); + vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa); +} + +static void test_memslot_do_unmap(struct vm_data *data, + uint64_t offsp, uint64_t count) +{ + uint64_t gpa, ctr; + + for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) { + uint64_t npages; + void *hva; + int ret; + + hva = vm_gpa2hva(data, gpa, &npages); + TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa); + npages = min(npages, count - ctr); + ret = madvise(hva, npages * 4096, MADV_DONTNEED); + TEST_ASSERT(!ret, + "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64, + hva, gpa); + ctr += npages; + gpa += npages * 4096; + } + TEST_ASSERT(ctr == count, + "madvise(MADV_DONTNEED) should exactly cover all of the requested area"); +} + +static void test_memslot_map_unmap_check(struct vm_data *data, + uint64_t offsp, uint64_t valexp) +{ + uint64_t gpa; + uint64_t *val; + + if (!map_unmap_verify) + return; + + gpa = MEM_TEST_GPA + offsp * 4096; + val = (typeof(val))vm_gpa2hva(data, gpa, NULL); + TEST_ASSERT(*val == valexp, + "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")", + *val, valexp, gpa); + *val = 0; +} + +static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync) +{ + /* + * Unmap the second half of the test area while guest writes to (maps) + * the first half. + */ + test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2, + MEM_TEST_MAP_SIZE_PAGES / 2); + + /* + * Wait for the guest to finish writing the first half of the test + * area, verify the written value on the first and the last page of + * this area and then unmap it. + * Meanwhile, the guest is writing to (mapping) the second half of + * the test area. + */ + host_perform_sync(sync); + test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1); + test_memslot_map_unmap_check(data, + MEM_TEST_MAP_SIZE_PAGES / 2 - 1, + MEM_TEST_VAL_1); + test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2); + + + /* + * Wait for the guest to finish writing the second half of the test + * area and verify the written value on the first and the last page + * of this area. + * The area will be unmapped at the beginning of the next loop + * iteration. + * Meanwhile, the guest is writing to (mapping) the first half of + * the test area. + */ + host_perform_sync(sync); + test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2, + MEM_TEST_VAL_2); + test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1, + MEM_TEST_VAL_2); +} + +static void test_memslot_unmap_loop_common(struct vm_data *data, + struct sync_area *sync, + uint64_t chunk) +{ + uint64_t ctr; + + /* + * Wait for the guest to finish mapping page(s) in the first half + * of the test area, verify the written value and then perform unmap + * of this area. + * Meanwhile, the guest is writing to (mapping) page(s) in the second + * half of the test area. + */ + host_perform_sync(sync); + test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1); + for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk) + test_memslot_do_unmap(data, ctr, chunk); + + /* Likewise, but for the opposite host / guest areas */ + host_perform_sync(sync); + test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2, + MEM_TEST_VAL_2); + for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2; + ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk) + test_memslot_do_unmap(data, ctr, chunk); +} + +static void test_memslot_unmap_loop(struct vm_data *data, + struct sync_area *sync) +{ + test_memslot_unmap_loop_common(data, sync, 1); +} + +static void test_memslot_unmap_loop_chunked(struct vm_data *data, + struct sync_area *sync) +{ + test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES); +} + +static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync) +{ + uint64_t gptr; + + for (gptr = MEM_TEST_GPA + 4096 / 2; + gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) + *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2; + + host_perform_sync(sync); + + for (gptr = MEM_TEST_GPA; + gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) { + uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL); + uint64_t val = *vptr; + + TEST_ASSERT(val == MEM_TEST_VAL_1, + "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")", + val, gptr); + *vptr = 0; + } + + host_perform_sync(sync); +} + +struct test_data { + const char *name; + uint64_t mem_size; + void (*guest_code)(void); + bool (*prepare)(struct vm_data *data, struct sync_area *sync, + uint64_t *maxslots); + void (*loop)(struct vm_data *data, struct sync_area *sync); +}; + +static bool test_execute(int nslots, uint64_t *maxslots, + unsigned int maxtime, + const struct test_data *tdata, + uint64_t *nloops, + struct timespec *slot_runtime, + struct timespec *guest_runtime) +{ + uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES; + struct vm_data *data; + struct sync_area *sync; + struct timespec tstart; + bool ret = true; + + data = alloc_vm(); + if (!prepare_vm(data, nslots, maxslots, tdata->guest_code, + mem_size, slot_runtime)) { + ret = false; + goto exit_free; + } + + sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL); + + if (tdata->prepare && + !tdata->prepare(data, sync, maxslots)) { + ret = false; + goto exit_free; + } + + launch_vm(data); + + clock_gettime(CLOCK_MONOTONIC, &tstart); + let_guest_run(sync); + + while (1) { + *guest_runtime = timespec_elapsed(tstart); + if (guest_runtime->tv_sec >= maxtime) + break; + + tdata->loop(data, sync); + + (*nloops)++; + } + + make_guest_exit(sync); + wait_guest_exit(data); + +exit_free: + free_vm(data); + + return ret; +} + +static const struct test_data tests[] = { + { + .name = "map", + .mem_size = MEM_SIZE_MAP_PAGES, + .guest_code = guest_code_test_memslot_map, + .loop = test_memslot_map_loop, + }, + { + .name = "unmap", + .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1, + .guest_code = guest_code_test_memslot_unmap, + .loop = test_memslot_unmap_loop, + }, + { + .name = "unmap chunked", + .mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1, + .guest_code = guest_code_test_memslot_unmap, + .loop = test_memslot_unmap_loop_chunked, + }, + { + .name = "move active area", + .guest_code = guest_code_test_memslot_move, + .prepare = test_memslot_move_prepare_active, + .loop = test_memslot_move_loop, + }, + { + .name = "move inactive area", + .guest_code = guest_code_test_memslot_move, + .prepare = test_memslot_move_prepare_inactive, + .loop = test_memslot_move_loop, + }, + { + .name = "RW", + .guest_code = guest_code_test_memslot_rw, + .loop = test_memslot_rw_loop + }, +}; + +#define NTESTS ARRAY_SIZE(tests) + +struct test_args { + int tfirst; + int tlast; + int nslots; + int seconds; + int runs; +}; + +static void help(char *name, struct test_args *targs) +{ + int ctr; + + pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n", + name); + pr_info(" -h: print this help screen.\n"); + pr_info(" -v: enable verbose mode (not for benchmarking).\n"); + pr_info(" -d: enable extra debug checks.\n"); + pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n", + targs->nslots); + pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n", + targs->tfirst, NTESTS - 1); + pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n", + targs->tlast, NTESTS - 1); + pr_info(" -l: specify the test length in seconds (currently: %i)\n", + targs->seconds); + pr_info(" -r: specify the number of runs per test (currently: %i)\n", + targs->runs); + + pr_info("\nAvailable tests:\n"); + for (ctr = 0; ctr < NTESTS; ctr++) + pr_info("%d: %s\n", ctr, tests[ctr].name); +} + +static bool parse_args(int argc, char *argv[], + struct test_args *targs) +{ + int opt; + + while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) { + switch (opt) { + case 'h': + default: + help(argv[0], targs); + return false; + case 'v': + verbose = true; + break; + case 'd': + map_unmap_verify = true; + break; + case 's': + targs->nslots = atoi(optarg); + if (targs->nslots <= 0 && targs->nslots != -1) { + pr_info("Slot count cap has to be positive or -1 for no cap\n"); + return false; + } + break; + case 'f': + targs->tfirst = atoi(optarg); + if (targs->tfirst < 0) { + pr_info("First test to run has to be non-negative\n"); + return false; + } + break; + case 'e': + targs->tlast = atoi(optarg); + if (targs->tlast < 0 || targs->tlast >= NTESTS) { + pr_info("Last test to run has to be non-negative and less than %zu\n", + NTESTS); + return false; + } + break; + case 'l': + targs->seconds = atoi(optarg); + if (targs->seconds < 0) { + pr_info("Test length in seconds has to be non-negative\n"); + return false; + } + break; + case 'r': + targs->runs = atoi(optarg); + if (targs->runs <= 0) { + pr_info("Runs per test has to be positive\n"); + return false; + } + break; + } + } + + if (optind < argc) { + help(argv[0], targs); + return false; + } + + if (targs->tfirst > targs->tlast) { + pr_info("First test to run cannot be greater than the last test to run\n"); + return false; + } + + return true; +} + +struct test_result { + struct timespec slot_runtime, guest_runtime, iter_runtime; + int64_t slottimens, runtimens; + uint64_t nloops; +}; + +static bool test_loop(const struct test_data *data, + const struct test_args *targs, + struct test_result *rbestslottime, + struct test_result *rbestruntime) +{ + uint64_t maxslots; + struct test_result result; + + result.nloops = 0; + if (!test_execute(targs->nslots, &maxslots, targs->seconds, data, + &result.nloops, + &result.slot_runtime, &result.guest_runtime)) { + if (maxslots) + pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n", + maxslots); + else + pr_info("Memslot count may be too high for this test, try adjusting the cap\n"); + + return false; + } + + pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n", + result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec, + result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec); + if (!result.nloops) { + pr_info("No full loops done - too short test time or system too loaded?\n"); + return true; + } + + result.iter_runtime = timespec_div(result.guest_runtime, + result.nloops); + pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n", + result.nloops, + result.iter_runtime.tv_sec, + result.iter_runtime.tv_nsec); + result.slottimens = timespec_to_ns(result.slot_runtime); + result.runtimens = timespec_to_ns(result.iter_runtime); + + /* + * Only rank the slot setup time for tests using the whole test memory + * area so they are comparable + */ + if (!data->mem_size && + (!rbestslottime->slottimens || + result.slottimens < rbestslottime->slottimens)) + *rbestslottime = result; + if (!rbestruntime->runtimens || + result.runtimens < rbestruntime->runtimens) + *rbestruntime = result; + + return true; +} + +int main(int argc, char *argv[]) +{ + struct test_args targs = { + .tfirst = 0, + .tlast = NTESTS - 1, + .nslots = -1, + .seconds = 5, + .runs = 1, + }; + struct test_result rbestslottime; + int tctr; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + if (!parse_args(argc, argv, &targs)) + return -1; + + rbestslottime.slottimens = 0; + for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) { + const struct test_data *data = &tests[tctr]; + unsigned int runctr; + struct test_result rbestruntime; + + if (tctr > targs.tfirst) + pr_info("\n"); + + pr_info("Testing %s performance with %i runs, %d seconds each\n", + data->name, targs.runs, targs.seconds); + + rbestruntime.runtimens = 0; + for (runctr = 0; runctr < targs.runs; runctr++) + if (!test_loop(data, &targs, + &rbestslottime, &rbestruntime)) + break; + + if (rbestruntime.runtimens) + pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n", + rbestruntime.iter_runtime.tv_sec, + rbestruntime.iter_runtime.tv_nsec, + rbestruntime.nloops); + } + + if (rbestslottime.slottimens) + pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n", + rbestslottime.slot_runtime.tv_sec, + rbestslottime.slot_runtime.tv_nsec); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index ca22ee6d19cb..63096cea26c6 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -18,6 +18,28 @@ #include "vmx.h" #define VCPU_ID 5 +#define NMI_VECTOR 2 + +static int ud_count; + +void enable_x2apic(void) +{ + uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4); + + wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) | + MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD); + wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED); +} + +static void guest_ud_handler(struct ex_regs *regs) +{ + ud_count++; + regs->rip += 3; /* VMLAUNCH */ +} + +static void guest_nmi_handler(struct ex_regs *regs) +{ +} void l2_guest_code(void) { @@ -25,15 +47,23 @@ void l2_guest_code(void) GUEST_SYNC(8); + /* Forced exit to L1 upon restore */ + GUEST_SYNC(9); + /* Done, exit to L1 and never come back. */ vmcall(); } -void l1_guest_code(struct vmx_pages *vmx_pages) +void guest_code(struct vmx_pages *vmx_pages) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + enable_x2apic(); + + GUEST_SYNC(1); + GUEST_SYNC(2); + enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist); GUEST_ASSERT(vmx_pages->vmcs_gpa); @@ -55,27 +85,40 @@ void l1_guest_code(struct vmx_pages *vmx_pages) current_evmcs->revision_id = EVMCS_VERSION; GUEST_SYNC(6); + current_evmcs->pin_based_vm_exec_control |= + PIN_BASED_NMI_EXITING; GUEST_ASSERT(!vmlaunch()); GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); - GUEST_SYNC(9); + + /* + * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is + * up-to-date (RIP points where it should and not at the beginning + * of l2_guest_code(). GUEST_SYNC(9) checkes that. + */ GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_SYNC(10); + + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_SYNC(11); + + /* Try enlightened vmptrld with an incorrect GPA */ + evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs); + GUEST_ASSERT(vmlaunch()); + GUEST_ASSERT(ud_count == 1); + GUEST_DONE(); } -void guest_code(struct vmx_pages *vmx_pages) +void inject_nmi(struct kvm_vm *vm) { - GUEST_SYNC(1); - GUEST_SYNC(2); + struct kvm_vcpu_events events; - if (vmx_pages) - l1_guest_code(vmx_pages); + vcpu_events_get(vm, VCPU_ID, &events); - GUEST_DONE(); + events.nmi.pending = 1; + events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING; - /* Try enlightened vmptrld with an incorrect GPA */ - evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs); - GUEST_ASSERT(vmlaunch()); + vcpu_events_set(vm, VCPU_ID, &events); } int main(int argc, char *argv[]) @@ -109,6 +152,13 @@ int main(int argc, char *argv[]) vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_handle_exception(vm, UD_VECTOR, guest_ud_handler); + vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler); + + pr_info("Running L1 which uses EVMCS to run L2\n"); + for (stage = 1;; stage++) { _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, @@ -124,7 +174,7 @@ int main(int argc, char *argv[]) case UCALL_SYNC: break; case UCALL_DONE: - goto part1_done; + goto done; default: TEST_FAIL("Unknown ucall %lu", uc.cmd); } @@ -154,12 +204,14 @@ int main(int argc, char *argv[]) TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", (ulong) regs2.rdi, (ulong) regs2.rsi); - } -part1_done: - _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, - "Unexpected successful VMEnter with invalid eVMCS pointer!"); + /* Force immediate L2->L1 exit before resuming */ + if (stage == 8) { + pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n"); + inject_nmi(vm); + } + } +done: kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c index 9b78e8889638..8c77537af5a1 100644 --- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c @@ -19,7 +19,12 @@ struct { u32 function; u32 index; } mangled_cpuids[] = { + /* + * These entries depend on the vCPU's XCR0 register and IA32_XSS MSR, + * which are not controlled for by this test. + */ {.function = 0xd, .index = 0}, + {.function = 0xd, .index = 1}, }; static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c index cb953df4d7d0..8aed0db1331d 100644 --- a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c +++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c @@ -37,9 +37,7 @@ static void test_get_msr_index(void) int old_res, res, kvm_fd, r; struct kvm_msr_list *list; - kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - if (kvm_fd < 0) - exit(KSFT_SKIP); + kvm_fd = open_kvm_dev_path_or_exit(); old_res = kvm_num_index_msrs(kvm_fd, 0); TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); @@ -101,9 +99,7 @@ static void test_get_msr_feature(void) int res, old_res, i, kvm_fd; struct kvm_msr_list *feature_list; - kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - if (kvm_fd < 0) - exit(KSFT_SKIP); + kvm_fd = open_kvm_dev_path_or_exit(); old_res = kvm_num_feature_msrs(kvm_fd, 0); TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); diff --git a/tools/testing/selftests/nci/.gitignore b/tools/testing/selftests/nci/.gitignore new file mode 100644 index 000000000000..448eeb4590fc --- /dev/null +++ b/tools/testing/selftests/nci/.gitignore @@ -0,0 +1 @@ +/nci_dev diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 61ae899cfc17..19deb9cdf72f 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -30,3 +30,4 @@ hwtstamp_config rxtimestamp timestamping txtimestamp +so_netns_cookie diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 3915bb7bfc39..79c9eb0034d5 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -30,7 +30,7 @@ TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag -TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr +TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie TEST_GEN_FILES += tcp_fastopen_backup_key TEST_GEN_FILES += fin_ack_lat TEST_GEN_FILES += reuseaddr_ports_exhausted diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 614d5477365a..6f905b53904f 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -1,4 +1,5 @@ CONFIG_USER_NS=y +CONFIG_NET_NS=y CONFIG_BPF_SYSCALL=y CONFIG_TEST_BPF=m CONFIG_NUMA=y diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py index 834066d465fc..2b5d6ff87373 100755 --- a/tools/testing/selftests/net/devlink_port_split.py +++ b/tools/testing/selftests/net/devlink_port_split.py @@ -18,6 +18,8 @@ import sys # +# Kselftest framework requirement - SKIP code is 4 +KSFT_SKIP=4 Port = collections.namedtuple('Port', 'bus_info name') @@ -239,7 +241,11 @@ def main(cmdline=None): assert stderr == "" devs = json.loads(stdout)['dev'] - dev = list(devs.keys())[0] + if devs: + dev = list(devs.keys())[0] + else: + print("no devlink device was found, test skipped") + sys.exit(KSFT_SKIP) cmd = "devlink dev show %s" % dev stdout, stderr = run_command(cmd) diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 49774a8a7736..0d293391e9a4 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -925,6 +925,14 @@ ipv6_fcnal_runtime() run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1" run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81" + # route can not use prefsrc with nexthops + run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 from 2001:db8:91::1" + log_test $? 2 "IPv6 route can not use src routing with external nexthop" + + # check cleanup path on invalid metric + run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 congctl lock foo" + log_test $? 2 "IPv6 route with invalid metric" + # rpfilter and default route $IP nexthop flush >/dev/null 2>&1 run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP" @@ -1366,6 +1374,10 @@ ipv4_fcnal_runtime() run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3" log_test $? 2 "Nexthop replace with invalid scope for existing route" + # check cleanup path on invalid metric + run_cmd "$IP ro add 172.16.101.2/32 nhid 22 congctl lock foo" + log_test $? 2 "IPv4 route with invalid metric" + # # add route with nexthop and check traffic # diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 76d9487fb03c..5abe92d55b69 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1384,12 +1384,37 @@ ipv4_rt_replace() ipv4_rt_replace_mpath } +# checks that cached input route on VRF port is deleted +# when VRF is deleted +ipv4_local_rt_cache() +{ + run_cmd "ip addr add 10.0.0.1/32 dev lo" + run_cmd "ip netns add test-ns" + run_cmd "ip link add veth-outside type veth peer name veth-inside" + run_cmd "ip link add vrf-100 type vrf table 1100" + run_cmd "ip link set veth-outside master vrf-100" + run_cmd "ip link set veth-inside netns test-ns" + run_cmd "ip link set veth-outside up" + run_cmd "ip link set vrf-100 up" + run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100" + run_cmd "ip netns exec test-ns ip link set veth-inside up" + run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside" + run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside" + run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1" + run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1" + run_cmd "ip link delete vrf-100" + + # if we do not hang test is a success + log_test $? 0 "Cached route removed from VRF port device" +} + ipv4_route_test() { route_setup ipv4_rt_add ipv4_rt_replace + ipv4_local_rt_cache route_cleanup } diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh new file mode 100755 index 000000000000..a15d21dc035a --- /dev/null +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -0,0 +1,364 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test traffic distribution between two paths when using custom hash policy. +# +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.{2-253}/24 | | +# | 2001:db8:1::{2-fd}/64 | | +# +-------------------------|------+ +# | +# +-------------------------|-------------------------+ +# | SW1 | | +# | $rp1 + | +# | 198.51.100.1/24 | +# | 2001:db8:1::1/64 | +# | | +# | | +# | $rp11 + + $rp12 | +# | 192.0.2.1/28 | | 192.0.2.17/28 | +# | 2001:db8:2::1/64 | | 2001:db8:3::1/64 | +# +------------------|-------------|------------------+ +# | | +# +------------------|-------------|------------------+ +# | SW2 | | | +# | | | | +# | $rp21 + + $rp22 | +# | 192.0.2.2/28 192.0.2.18/28 | +# | 2001:db8:2::2/64 2001:db8:3::2/64 | +# | | +# | | +# | $rp2 + | +# | 203.0.113.1/24 | | +# | 2001:db8:4::1/64 | | +# +-------------------------|-------------------------+ +# | +# +-------------------------|------+ +# | H2 | | +# | $h2 + | +# | 203.0.113.{2-253}/24 | +# | 2001:db8:4::{2-fd}/64 | +# +--------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + custom_hash +" + +NUM_NETIFS=8 +source lib.sh + +h1_create() +{ + simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64 + ip route add vrf v$h1 default via 198.51.100.1 dev $h1 + ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 default + ip route del vrf v$h1 default + simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw1_create() +{ + simple_if_init $rp1 198.51.100.1/24 2001:db8:1::1/64 + __simple_if_init $rp11 v$rp1 192.0.2.1/28 2001:db8:2::1/64 + __simple_if_init $rp12 v$rp1 192.0.2.17/28 2001:db8:3::1/64 + + ip route add vrf v$rp1 203.0.113.0/24 \ + nexthop via 192.0.2.2 dev $rp11 \ + nexthop via 192.0.2.18 dev $rp12 + + ip -6 route add vrf v$rp1 2001:db8:4::/64 \ + nexthop via 2001:db8:2::2 dev $rp11 \ + nexthop via 2001:db8:3::2 dev $rp12 +} + +sw1_destroy() +{ + ip -6 route del vrf v$rp1 2001:db8:4::/64 + + ip route del vrf v$rp1 203.0.113.0/24 + + __simple_if_fini $rp12 192.0.2.17/28 2001:db8:3::1/64 + __simple_if_fini $rp11 192.0.2.1/28 2001:db8:2::1/64 + simple_if_fini $rp1 198.51.100.1/24 2001:db8:1::1/64 +} + +sw2_create() +{ + simple_if_init $rp2 203.0.113.1/24 2001:db8:4::1/64 + __simple_if_init $rp21 v$rp2 192.0.2.2/28 2001:db8:2::2/64 + __simple_if_init $rp22 v$rp2 192.0.2.18/28 2001:db8:3::2/64 + + ip route add vrf v$rp2 198.51.100.0/24 \ + nexthop via 192.0.2.1 dev $rp21 \ + nexthop via 192.0.2.17 dev $rp22 + + ip -6 route add vrf v$rp2 2001:db8:1::/64 \ + nexthop via 2001:db8:2::1 dev $rp21 \ + nexthop via 2001:db8:3::1 dev $rp22 +} + +sw2_destroy() +{ + ip -6 route del vrf v$rp2 2001:db8:1::/64 + + ip route del vrf v$rp2 198.51.100.0/24 + + __simple_if_fini $rp22 192.0.2.18/28 2001:db8:3::2/64 + __simple_if_fini $rp21 192.0.2.2/28 2001:db8:2::2/64 + simple_if_fini $rp2 203.0.113.1/24 2001:db8:4::1/64 +} + +h2_create() +{ + simple_if_init $h2 203.0.113.2/24 2001:db8:4::2/64 + ip route add vrf v$h2 default via 203.0.113.1 dev $h2 + ip -6 route add vrf v$h2 default via 2001:db8:4::1 dev $h2 +} + +h2_destroy() +{ + ip -6 route del vrf v$h2 default + ip route del vrf v$h2 default + simple_if_fini $h2 203.0.113.2/24 2001:db8:4::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + + rp1=${NETIFS[p2]} + + rp11=${NETIFS[p3]} + rp21=${NETIFS[p4]} + + rp12=${NETIFS[p5]} + rp22=${NETIFS[p6]} + + rp2=${NETIFS[p7]} + + h2=${NETIFS[p8]} + + vrf_prepare + h1_create + sw1_create + sw2_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 203.0.113.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:4::2 +} + +send_src_ipv4() +{ + $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_src_udp4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \ + -d 1msec -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \ + -d 1msec -t udp "sp=20000,dp=0-32768" +} + +send_src_ipv6() +{ + $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_flowlabel() +{ + # Generate 16384 echo requests, each with a random flow label. + for _ in $(seq 1 16384); do + ip vrf exec v$h1 \ + $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1 + done +} + +send_src_udp6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \ + -d 1msec -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \ + -d 1msec -t udp "sp=20000,dp=0-32768" +} + +custom_hash_test() +{ + local field="$1"; shift + local balanced="$1"; shift + local send_flows="$@" + + RET=0 + + local t0_rp11=$(link_stats_tx_packets_get $rp11) + local t0_rp12=$(link_stats_tx_packets_get $rp12) + + $send_flows + + local t1_rp11=$(link_stats_tx_packets_get $rp11) + local t1_rp12=$(link_stats_tx_packets_get $rp12) + + local d_rp11=$((t1_rp11 - t0_rp11)) + local d_rp12=$((t1_rp12 - t0_rp12)) + + local diff=$((d_rp12 - d_rp11)) + local sum=$((d_rp11 + d_rp12)) + + local pct=$(echo "$diff / $sum * 100" | bc -l) + local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc) + + [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) || + ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]] + check_err $? "Expected traffic to be $balanced, but it is not" + + log_test "Multipath hash field: $field ($balanced)" + log_info "Packets sent on path1 / path2: $d_rp11 / $d_rp12" +} + +custom_hash_v4() +{ + log_info "Running IPv4 custom multipath hash tests" + + sysctl_set net.ipv4.fib_multipath_hash_policy 3 + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv4.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0001 + custom_hash_test "Source IP" "balanced" send_src_ipv4 + custom_hash_test "Source IP" "unbalanced" send_dst_ipv4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0002 + custom_hash_test "Destination IP" "balanced" send_dst_ipv4 + custom_hash_test "Destination IP" "unbalanced" send_src_ipv4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0010 + custom_hash_test "Source port" "balanced" send_src_udp4 + custom_hash_test "Source port" "unbalanced" send_dst_udp4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0020 + custom_hash_test "Destination port" "balanced" send_dst_udp4 + custom_hash_test "Destination port" "unbalanced" send_src_udp4 + + sysctl_restore net.ipv4.neigh.default.gc_thresh3 + sysctl_restore net.ipv4.neigh.default.gc_thresh2 + sysctl_restore net.ipv4.neigh.default.gc_thresh1 + + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +custom_hash_v6() +{ + log_info "Running IPv6 custom multipath hash tests" + + sysctl_set net.ipv6.fib_multipath_hash_policy 3 + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv6.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0001 + custom_hash_test "Source IP" "balanced" send_src_ipv6 + custom_hash_test "Source IP" "unbalanced" send_dst_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0002 + custom_hash_test "Destination IP" "balanced" send_dst_ipv6 + custom_hash_test "Destination IP" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0008 + custom_hash_test "Flowlabel" "balanced" send_flowlabel + custom_hash_test "Flowlabel" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0010 + custom_hash_test "Source port" "balanced" send_src_udp6 + custom_hash_test "Source port" "unbalanced" send_dst_udp6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0020 + custom_hash_test "Destination port" "balanced" send_dst_udp6 + custom_hash_test "Destination port" "unbalanced" send_src_udp6 + + sysctl_restore net.ipv6.neigh.default.gc_thresh3 + sysctl_restore net.ipv6.neigh.default.gc_thresh2 + sysctl_restore net.ipv6.neigh.default.gc_thresh1 + + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +custom_hash() +{ + # Test that when the hash policy is set to custom, traffic is + # distributed only according to the fields set in the + # fib_multipath_hash_fields sysctl. + # + # Each time set a different field and make sure traffic is only + # distributed when the field is changed in the packet stream. + custom_hash_v4 + custom_hash_v6 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 9c12c4fd3afc..13d3d4428a32 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -18,6 +18,12 @@ if [[ ! -v DEVLINK_DEV ]]; then DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \ -n | cut -d" " -f3) +elif [[ ! -z "$DEVLINK_DEV" ]]; then + devlink dev show $DEVLINK_DEV &> /dev/null + if [ $? -ne 0 ]; then + echo "SKIP: devlink device \"$DEVLINK_DEV\" not found" + exit 1 + fi fi ############################################################################## @@ -318,6 +324,14 @@ devlink_trap_rx_bytes_get() | jq '.[][][]["stats"]["rx"]["bytes"]' } +devlink_trap_drop_packets_get() +{ + local trap_name=$1; shift + + devlink -js trap show $DEVLINK_DEV trap $trap_name \ + | jq '.[][][]["stats"]["rx"]["dropped"]' +} + devlink_trap_stats_idle_test() { local trap_name=$1; shift @@ -339,6 +353,24 @@ devlink_trap_stats_idle_test() fi } +devlink_trap_drop_stats_idle_test() +{ + local trap_name=$1; shift + local t0_packets t0_bytes + + t0_packets=$(devlink_trap_drop_packets_get $trap_name) + + sleep 1 + + t1_packets=$(devlink_trap_drop_packets_get $trap_name) + + if [[ $t0_packets -eq $t1_packets ]]; then + return 0 + else + return 1 + fi +} + devlink_traps_enable_all() { local trap_name diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh new file mode 100755 index 000000000000..a73f52efcb6c --- /dev/null +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -0,0 +1,456 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test traffic distribution when there are multiple paths between an IPv4 GRE +# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts. +# Multiple routes are in the underlay network. With the default multipath +# policy, SW2 will only look at the outer IP addresses, hence only a single +# route would be used. +# +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.{2-253}/24 | | +# | 2001:db8:1::{2-fd}/64 | | +# +-------------------------|------+ +# | +# +-------------------------|------------------+ +# | SW1 | | +# | $ol1 + | +# | 198.51.100.1/24 | +# | 2001:db8:1::1/64 | +# | | +# | + g1 (gre) | +# | loc=192.0.2.1 | +# | rem=192.0.2.2 --. | +# | tos=inherit | | +# | v | +# | + $ul1 | +# | | 192.0.2.17/28 | +# +---------------------|----------------------+ +# | +# +---------------------|----------------------+ +# | SW2 | | +# | $ul21 + | +# | 192.0.2.18/28 | | +# | | | +# ! __________________+___ | +# | / \ | +# | | | | +# | + $ul22.111 (vlan) + $ul22.222 (vlan) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# | | | | +# +--|----------------------|------------------+ +# | | +# +--|----------------------|------------------+ +# | | | | +# | + $ul32.111 (vlan) + $ul32.222 (vlan) | +# | | 192.0.2.34/28 | 192.0.2.50/28 | +# | | | | +# | \__________________+___/ | +# | | | +# | | | +# | $ul31 + | +# | 192.0.2.65/28 | SW3 | +# +---------------------|----------------------+ +# | +# +---------------------|----------------------+ +# | + $ul4 | +# | ^ 192.0.2.66/28 | +# | | | +# | + g2 (gre) | | +# | loc=192.0.2.2 | | +# | rem=192.0.2.1 --' | +# | tos=inherit | +# | | +# | $ol4 + | +# | 203.0.113.1/24 | | +# | 2001:db8:2::1/64 | SW4 | +# +-------------------------|------------------+ +# | +# +-------------------------|------+ +# | | | +# | $h2 + | +# | 203.0.113.{2-253}/24 | +# | 2001:db8:2::{2-fd}/64 H2 | +# +--------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + custom_hash +" + +NUM_NETIFS=10 +source lib.sh + +h1_create() +{ + simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64 + ip route add vrf v$h1 default via 198.51.100.1 dev $h1 + ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 default + ip route del vrf v$h1 default + simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw1_create() +{ + simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64 + __simple_if_init $ul1 v$ol1 192.0.2.17/28 + + tunnel_create g1 gre 192.0.2.1 192.0.2.2 tos inherit dev v$ol1 + __simple_if_init g1 v$ol1 192.0.2.1/32 + ip route add vrf v$ol1 192.0.2.2/32 via 192.0.2.18 + + ip route add vrf v$ol1 203.0.113.0/24 dev g1 + ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1 +} + +sw1_destroy() +{ + ip -6 route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 203.0.113.0/24 + + ip route del vrf v$ol1 192.0.2.2/32 + __simple_if_fini g1 192.0.2.1/32 + tunnel_destroy g1 + + __simple_if_fini $ul1 192.0.2.17/28 + simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64 +} + +sw2_create() +{ + simple_if_init $ul21 192.0.2.18/28 + __simple_if_init $ul22 v$ul21 + vlan_create $ul22 111 v$ul21 192.0.2.33/28 + vlan_create $ul22 222 v$ul21 192.0.2.49/28 + + ip route add vrf v$ul21 192.0.2.1/32 via 192.0.2.17 + ip route add vrf v$ul21 192.0.2.2/32 \ + nexthop via 192.0.2.34 \ + nexthop via 192.0.2.50 +} + +sw2_destroy() +{ + ip route del vrf v$ul21 192.0.2.2/32 + ip route del vrf v$ul21 192.0.2.1/32 + + vlan_destroy $ul22 222 + vlan_destroy $ul22 111 + __simple_if_fini $ul22 + simple_if_fini $ul21 192.0.2.18/28 +} + +sw3_create() +{ + simple_if_init $ul31 192.0.2.65/28 + __simple_if_init $ul32 v$ul31 + vlan_create $ul32 111 v$ul31 192.0.2.34/28 + vlan_create $ul32 222 v$ul31 192.0.2.50/28 + + ip route add vrf v$ul31 192.0.2.2/32 via 192.0.2.66 + ip route add vrf v$ul31 192.0.2.1/32 \ + nexthop via 192.0.2.33 \ + nexthop via 192.0.2.49 + + tc qdisc add dev $ul32 clsact + tc filter add dev $ul32 ingress pref 111 prot 802.1Q \ + flower vlan_id 111 action pass + tc filter add dev $ul32 ingress pref 222 prot 802.1Q \ + flower vlan_id 222 action pass +} + +sw3_destroy() +{ + tc qdisc del dev $ul32 clsact + + ip route del vrf v$ul31 192.0.2.1/32 + ip route del vrf v$ul31 192.0.2.2/32 + + vlan_destroy $ul32 222 + vlan_destroy $ul32 111 + __simple_if_fini $ul32 + simple_if_fini $ul31 192.0.2.65/28 +} + +sw4_create() +{ + simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64 + __simple_if_init $ul4 v$ol4 192.0.2.66/28 + + tunnel_create g2 gre 192.0.2.2 192.0.2.1 tos inherit dev v$ol4 + __simple_if_init g2 v$ol4 192.0.2.2/32 + ip route add vrf v$ol4 192.0.2.1/32 via 192.0.2.65 + + ip route add vrf v$ol4 198.51.100.0/24 dev g2 + ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2 +} + +sw4_destroy() +{ + ip -6 route del vrf v$ol4 2001:db8:1::/64 + ip route del vrf v$ol4 198.51.100.0/24 + + ip route del vrf v$ol4 192.0.2.1/32 + __simple_if_fini g2 192.0.2.2/32 + tunnel_destroy g2 + + __simple_if_fini $ul4 192.0.2.66/28 + simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64 +} + +h2_create() +{ + simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64 + ip route add vrf v$h2 default via 203.0.113.1 dev $h2 + ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2 +} + +h2_destroy() +{ + ip -6 route del vrf v$h2 default + ip route del vrf v$h2 default + simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + + ol1=${NETIFS[p2]} + ul1=${NETIFS[p3]} + + ul21=${NETIFS[p4]} + ul22=${NETIFS[p5]} + + ul32=${NETIFS[p6]} + ul31=${NETIFS[p7]} + + ul4=${NETIFS[p8]} + ol4=${NETIFS[p9]} + + h2=${NETIFS[p10]} + + vrf_prepare + h1_create + sw1_create + sw2_create + sw3_create + sw4_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw4_destroy + sw3_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 203.0.113.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +send_src_ipv4() +{ + $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_src_udp4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \ + -d 1msec -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \ + -d 1msec -t udp "sp=20000,dp=0-32768" +} + +send_src_ipv6() +{ + $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_flowlabel() +{ + # Generate 16384 echo requests, each with a random flow label. + for _ in $(seq 1 16384); do + ip vrf exec v$h1 \ + $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 + done +} + +send_src_udp6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=20000,dp=0-32768" +} + +custom_hash_test() +{ + local field="$1"; shift + local balanced="$1"; shift + local send_flows="$@" + + RET=0 + + local t0_111=$(tc_rule_stats_get $ul32 111 ingress) + local t0_222=$(tc_rule_stats_get $ul32 222 ingress) + + $send_flows + + local t1_111=$(tc_rule_stats_get $ul32 111 ingress) + local t1_222=$(tc_rule_stats_get $ul32 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + + local diff=$((d222 - d111)) + local sum=$((d111 + d222)) + + local pct=$(echo "$diff / $sum * 100" | bc -l) + local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc) + + [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) || + ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]] + check_err $? "Expected traffic to be $balanced, but it is not" + + log_test "Multipath hash field: $field ($balanced)" + log_info "Packets sent on path1 / path2: $d111 / $d222" +} + +custom_hash_v4() +{ + log_info "Running IPv4 overlay custom multipath hash tests" + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv4.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040 + custom_hash_test "Inner source IP" "balanced" send_src_ipv4 + custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080 + custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4 + custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400 + custom_hash_test "Inner source port" "balanced" send_src_udp4 + custom_hash_test "Inner source port" "unbalanced" send_dst_udp4 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800 + custom_hash_test "Inner destination port" "balanced" send_dst_udp4 + custom_hash_test "Inner destination port" "unbalanced" send_src_udp4 + + sysctl_restore net.ipv4.neigh.default.gc_thresh3 + sysctl_restore net.ipv4.neigh.default.gc_thresh2 + sysctl_restore net.ipv4.neigh.default.gc_thresh1 +} + +custom_hash_v6() +{ + log_info "Running IPv6 overlay custom multipath hash tests" + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv6.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040 + custom_hash_test "Inner source IP" "balanced" send_src_ipv6 + custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080 + custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6 + custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0200 + custom_hash_test "Inner flowlabel" "balanced" send_flowlabel + custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400 + custom_hash_test "Inner source port" "balanced" send_src_udp6 + custom_hash_test "Inner source port" "unbalanced" send_dst_udp6 + + sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800 + custom_hash_test "Inner destination port" "balanced" send_dst_udp6 + custom_hash_test "Inner destination port" "unbalanced" send_src_udp6 + + sysctl_restore net.ipv6.neigh.default.gc_thresh3 + sysctl_restore net.ipv6.neigh.default.gc_thresh2 + sysctl_restore net.ipv6.neigh.default.gc_thresh1 +} + +custom_hash() +{ + # Test that when the hash policy is set to custom, traffic is + # distributed only according to the fields set in the + # fib_multipath_hash_fields sysctl. + # + # Each time set a different field and make sure traffic is only + # distributed when the field is changed in the packet stream. + + sysctl_set net.ipv4.fib_multipath_hash_policy 3 + + custom_hash_v4 + custom_hash_v6 + + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh new file mode 100755 index 000000000000..8fea2c2e0b25 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -0,0 +1,458 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test traffic distribution when there are multiple paths between an IPv6 GRE +# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts. +# Multiple routes are in the underlay network. With the default multipath +# policy, SW2 will only look at the outer IP addresses, hence only a single +# route would be used. +# +# +--------------------------------+ +# | H1 | +# | $h1 + | +# | 198.51.100.{2-253}/24 | | +# | 2001:db8:1::{2-fd}/64 | | +# +-------------------------|------+ +# | +# +-------------------------|-------------------+ +# | SW1 | | +# | $ol1 + | +# | 198.51.100.1/24 | +# | 2001:db8:1::1/64 | +# | | +# |+ g1 (ip6gre) | +# | loc=2001:db8:3::1 | +# | rem=2001:db8:3::2 -. | +# | tos=inherit | | +# | v | +# | + $ul1 | +# | | 2001:db8:10::1/64 | +# +---------------------|-----------------------+ +# | +# +---------------------|-----------------------+ +# | SW2 | | +# | $ul21 + | +# | 2001:db8:10::2/64 | | +# | | | +# ! __________________+___ | +# | / \ | +# | | | | +# | + $ul22.111 (vlan) + $ul22.222 (vlan) | +# | | 2001:db8:11::1/64 | 2001:db8:12::1/64 | +# | | | | +# +--|----------------------|-------------------+ +# | | +# +--|----------------------|-------------------+ +# | | | | +# | + $ul32.111 (vlan) + $ul32.222 (vlan) | +# | | 2001:db8:11::2/64 | 2001:db8:12::2/64 | +# | | | | +# | \__________________+___/ | +# | | | +# | | | +# | $ul31 + | +# | 2001:db8:13::1/64 | SW3 | +# +---------------------|-----------------------+ +# | +# +---------------------|-----------------------+ +# | + $ul4 | +# | ^ 2001:db8:13::2/64 | +# | | | +# |+ g2 (ip6gre) | | +# | loc=2001:db8:3::2 | | +# | rem=2001:db8:3::1 -' | +# | tos=inherit | +# | | +# | $ol4 + | +# | 203.0.113.1/24 | | +# | 2001:db8:2::1/64 | SW4 | +# +-------------------------|-------------------+ +# | +# +-------------------------|------+ +# | | | +# | $h2 + | +# | 203.0.113.{2-253}/24 | +# | 2001:db8:2::{2-fd}/64 H2 | +# +--------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + custom_hash +" + +NUM_NETIFS=10 +source lib.sh + +h1_create() +{ + simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64 + ip route add vrf v$h1 default via 198.51.100.1 dev $h1 + ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 default + ip route del vrf v$h1 default + simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64 +} + +sw1_create() +{ + simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64 + __simple_if_init $ul1 v$ol1 2001:db8:10::1/64 + + tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \ + dev v$ol1 + __simple_if_init g1 v$ol1 2001:db8:3::1/128 + ip route add vrf v$ol1 2001:db8:3::2/128 via 2001:db8:10::2 + + ip route add vrf v$ol1 203.0.113.0/24 dev g1 + ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1 +} + +sw1_destroy() +{ + ip -6 route del vrf v$ol1 2001:db8:2::/64 + ip route del vrf v$ol1 203.0.113.0/24 + + ip route del vrf v$ol1 2001:db8:3::2/128 + __simple_if_fini g1 2001:db8:3::1/128 + tunnel_destroy g1 + + __simple_if_fini $ul1 2001:db8:10::1/64 + simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64 +} + +sw2_create() +{ + simple_if_init $ul21 2001:db8:10::2/64 + __simple_if_init $ul22 v$ul21 + vlan_create $ul22 111 v$ul21 2001:db8:11::1/64 + vlan_create $ul22 222 v$ul21 2001:db8:12::1/64 + + ip -6 route add vrf v$ul21 2001:db8:3::1/128 via 2001:db8:10::1 + ip -6 route add vrf v$ul21 2001:db8:3::2/128 \ + nexthop via 2001:db8:11::2 \ + nexthop via 2001:db8:12::2 +} + +sw2_destroy() +{ + ip -6 route del vrf v$ul21 2001:db8:3::2/128 + ip -6 route del vrf v$ul21 2001:db8:3::1/128 + + vlan_destroy $ul22 222 + vlan_destroy $ul22 111 + __simple_if_fini $ul22 + simple_if_fini $ul21 2001:db8:10::2/64 +} + +sw3_create() +{ + simple_if_init $ul31 2001:db8:13::1/64 + __simple_if_init $ul32 v$ul31 + vlan_create $ul32 111 v$ul31 2001:db8:11::2/64 + vlan_create $ul32 222 v$ul31 2001:db8:12::2/64 + + ip -6 route add vrf v$ul31 2001:db8:3::2/128 via 2001:db8:13::2 + ip -6 route add vrf v$ul31 2001:db8:3::1/128 \ + nexthop via 2001:db8:11::1 \ + nexthop via 2001:db8:12::1 + + tc qdisc add dev $ul32 clsact + tc filter add dev $ul32 ingress pref 111 prot 802.1Q \ + flower vlan_id 111 action pass + tc filter add dev $ul32 ingress pref 222 prot 802.1Q \ + flower vlan_id 222 action pass +} + +sw3_destroy() +{ + tc qdisc del dev $ul32 clsact + + ip -6 route del vrf v$ul31 2001:db8:3::1/128 + ip -6 route del vrf v$ul31 2001:db8:3::2/128 + + vlan_destroy $ul32 222 + vlan_destroy $ul32 111 + __simple_if_fini $ul32 + simple_if_fini $ul31 2001:db8:13::1/64 +} + +sw4_create() +{ + simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64 + __simple_if_init $ul4 v$ol4 2001:db8:13::2/64 + + tunnel_create g2 ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \ + dev v$ol4 + __simple_if_init g2 v$ol4 2001:db8:3::2/128 + ip -6 route add vrf v$ol4 2001:db8:3::1/128 via 2001:db8:13::1 + + ip route add vrf v$ol4 198.51.100.0/24 dev g2 + ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2 +} + +sw4_destroy() +{ + ip -6 route del vrf v$ol4 2001:db8:1::/64 + ip route del vrf v$ol4 198.51.100.0/24 + + ip -6 route del vrf v$ol4 2001:db8:3::1/128 + __simple_if_fini g2 2001:db8:3::2/128 + tunnel_destroy g2 + + __simple_if_fini $ul4 2001:db8:13::2/64 + simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64 +} + +h2_create() +{ + simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64 + ip route add vrf v$h2 default via 203.0.113.1 dev $h2 + ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2 +} + +h2_destroy() +{ + ip -6 route del vrf v$h2 default + ip route del vrf v$h2 default + simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + + ol1=${NETIFS[p2]} + ul1=${NETIFS[p3]} + + ul21=${NETIFS[p4]} + ul22=${NETIFS[p5]} + + ul32=${NETIFS[p6]} + ul31=${NETIFS[p7]} + + ul4=${NETIFS[p8]} + ol4=${NETIFS[p9]} + + h2=${NETIFS[p10]} + + vrf_prepare + h1_create + sw1_create + sw2_create + sw3_create + sw4_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + sw4_destroy + sw3_destroy + sw2_destroy + sw1_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 203.0.113.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +send_src_ipv4() +{ + $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_src_udp4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \ + -d 1msec -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp4() +{ + $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \ + -d 1msec -t udp "sp=20000,dp=0-32768" +} + +send_src_ipv6() +{ + $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_dst_ipv6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \ + -d 1msec -c 50 -t udp "sp=20000,dp=30000" +} + +send_flowlabel() +{ + # Generate 16384 echo requests, each with a random flow label. + for _ in $(seq 1 16384); do + ip vrf exec v$h1 \ + $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 + done +} + +send_src_udp6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=0-32768,dp=30000" +} + +send_dst_udp6() +{ + $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=20000,dp=0-32768" +} + +custom_hash_test() +{ + local field="$1"; shift + local balanced="$1"; shift + local send_flows="$@" + + RET=0 + + local t0_111=$(tc_rule_stats_get $ul32 111 ingress) + local t0_222=$(tc_rule_stats_get $ul32 222 ingress) + + $send_flows + + local t1_111=$(tc_rule_stats_get $ul32 111 ingress) + local t1_222=$(tc_rule_stats_get $ul32 222 ingress) + + local d111=$((t1_111 - t0_111)) + local d222=$((t1_222 - t0_222)) + + local diff=$((d222 - d111)) + local sum=$((d111 + d222)) + + local pct=$(echo "$diff / $sum * 100" | bc -l) + local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc) + + [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) || + ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]] + check_err $? "Expected traffic to be $balanced, but it is not" + + log_test "Multipath hash field: $field ($balanced)" + log_info "Packets sent on path1 / path2: $d111 / $d222" +} + +custom_hash_v4() +{ + log_info "Running IPv4 overlay custom multipath hash tests" + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv4.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv4.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040 + custom_hash_test "Inner source IP" "balanced" send_src_ipv4 + custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080 + custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4 + custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400 + custom_hash_test "Inner source port" "balanced" send_src_udp4 + custom_hash_test "Inner source port" "unbalanced" send_dst_udp4 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800 + custom_hash_test "Inner destination port" "balanced" send_dst_udp4 + custom_hash_test "Inner destination port" "unbalanced" send_src_udp4 + + sysctl_restore net.ipv4.neigh.default.gc_thresh3 + sysctl_restore net.ipv4.neigh.default.gc_thresh2 + sysctl_restore net.ipv4.neigh.default.gc_thresh1 +} + +custom_hash_v6() +{ + log_info "Running IPv6 overlay custom multipath hash tests" + + # Prevent the neighbour table from overflowing, as different neighbour + # entries will be created on $ol4 when using different destination IPs. + sysctl_set net.ipv6.neigh.default.gc_thresh1 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh2 1024 + sysctl_set net.ipv6.neigh.default.gc_thresh3 1024 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040 + custom_hash_test "Inner source IP" "balanced" send_src_ipv6 + custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080 + custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6 + custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0200 + custom_hash_test "Inner flowlabel" "balanced" send_flowlabel + custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400 + custom_hash_test "Inner source port" "balanced" send_src_udp6 + custom_hash_test "Inner source port" "unbalanced" send_dst_udp6 + + sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800 + custom_hash_test "Inner destination port" "balanced" send_dst_udp6 + custom_hash_test "Inner destination port" "unbalanced" send_src_udp6 + + sysctl_restore net.ipv6.neigh.default.gc_thresh3 + sysctl_restore net.ipv6.neigh.default.gc_thresh2 + sysctl_restore net.ipv6.neigh.default.gc_thresh1 +} + +custom_hash() +{ + # Test that when the hash policy is set to custom, traffic is + # distributed only according to the fields set in the + # fib_multipath_hash_fields sysctl. + # + # Each time set a different field and make sure traffic is only + # distributed when the field is changed in the packet stream. + + sysctl_set net.ipv6.fib_multipath_hash_policy 3 + + custom_hash_v4 + custom_hash_v6 + + sysctl_restore net.ipv6.fib_multipath_hash_policy +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh index 55eeacf59241..64fbd211d907 100755 --- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh +++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh @@ -75,7 +75,9 @@ switch_destroy() tc qdisc del dev $swp2 clsact tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh index 5f20d289ee43..10e594c55117 100755 --- a/tools/testing/selftests/net/forwarding/pedit_l4port.sh +++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh @@ -71,7 +71,9 @@ switch_destroy() tc qdisc del dev $swp2 clsact tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh index e3bd8a6bb8b4..bde11dc27873 100755 --- a/tools/testing/selftests/net/forwarding/skbedit_priority.sh +++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh @@ -72,7 +72,9 @@ switch_destroy() tc qdisc del dev $swp2 clsact tc qdisc del dev $swp1 clsact + ip link set dev $swp2 down ip link set dev $swp2 nomaster + ip link set dev $swp1 down ip link set dev $swp1 nomaster ip link del dev br1 } diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh new file mode 100755 index 000000000000..e4b04cd1644a --- /dev/null +++ b/tools/testing/selftests/net/icmp.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for checking ICMP response with dummy address instead of 0.0.0.0. +# Sets up two namespaces like: +# +----------------------+ +--------------------+ +# | ns1 | v4-via-v6 routes: | ns2 | +# | | ' | | +# | +--------+ -> 172.16.1.0/24 -> +--------+ | +# | | veth0 +--------------------------+ veth0 | | +# | +--------+ <- 172.16.0.0/24 <- +--------+ | +# | 172.16.0.1 | | 2001:db8:1::2/64 | +# | 2001:db8:1::2/64 | | | +# +----------------------+ +--------------------+ +# +# And then tries to ping 172.16.1.1 from ns1. This results in a "net +# unreachable" message being sent from ns2, but there is no IPv4 address set in +# that address space, so the kernel should substitute the dummy address +# 192.0.0.8 defined in RFC7600. + +NS1=ns1 +NS2=ns2 +H1_IP=172.16.0.1/32 +H1_IP6=2001:db8:1::1 +RT1=172.16.1.0/24 +PINGADDR=172.16.1.1 +RT2=172.16.0.0/24 +H2_IP6=2001:db8:1::2 + +TMPFILE=$(mktemp) + +cleanup() +{ + rm -f "$TMPFILE" + ip netns del $NS1 + ip netns del $NS2 +} + +trap cleanup EXIT + +# Namespaces +ip netns add $NS1 +ip netns add $NS2 + +# Connectivity +ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2 +ip -netns $NS1 link set dev veth0 up +ip -netns $NS2 link set dev veth0 up +ip -netns $NS1 addr add $H1_IP dev veth0 +ip -netns $NS1 addr add $H1_IP6/64 dev veth0 nodad +ip -netns $NS2 addr add $H2_IP6/64 dev veth0 nodad +ip -netns $NS1 route add $RT1 via inet6 $H2_IP6 +ip -netns $NS2 route add $RT2 via inet6 $H1_IP6 + +# Make sure ns2 will respond with ICMP unreachable +ip netns exec $NS2 sysctl -qw net.ipv4.icmp_ratelimit=0 net.ipv4.ip_forward=1 + +# Run the test - a ping runs in the background, and we capture ICMP responses +# with tcpdump; -c 1 means it should exit on the first ping, but add a timeout +# in case something goes wrong +ip netns exec $NS1 ping -w 3 -i 0.5 $PINGADDR >/dev/null & +ip netns exec $NS1 timeout 10 tcpdump -tpni veth0 -c 1 'icmp and icmp[icmptype] != icmp-echo' > $TMPFILE 2>/dev/null + +# Parse response and check for dummy address +# tcpdump output looks like: +# IP 192.0.0.8 > 172.16.0.1: ICMP net 172.16.1.1 unreachable, length 92 +RESP_IP=$(awk '{print $2}' < $TMPFILE) +if [[ "$RESP_IP" != "192.0.0.8" ]]; then + echo "FAIL - got ICMP response from $RESP_IP, should be 192.0.0.8" + exit 1 +else + echo "OK" + exit 0 +fi diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index bf361f30d6ef..c19ecc6a8614 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -63,10 +63,14 @@ log_test() local rc=$1 local expected=$2 local msg="$3" + local xfail=$4 if [ ${rc} -eq ${expected} ]; then printf "TEST: %-60s [ OK ]\n" "${msg}" nsuccess=$((nsuccess+1)) + elif [ ${rc} -eq ${xfail} ]; then + printf "TEST: %-60s [XFAIL]\n" "${msg}" + nxfail=$((nxfail+1)) else ret=1 nfail=$((nfail+1)) @@ -322,7 +326,7 @@ check_exception() ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -v "mtu" | grep -q "${R1_LLADDR}" fi - log_test $? 0 "IPv6: ${desc}" + log_test $? 0 "IPv6: ${desc}" 1 } run_ping() @@ -488,6 +492,7 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) ret=0 nsuccess=0 nfail=0 +nxfail=0 while getopts :pv o do @@ -532,5 +537,6 @@ fi printf "\nTests passed: %3d\n" ${nsuccess} printf "Tests failed: %3d\n" ${nfail} +printf "Tests xfailed: %3d\n" ${nxfail} exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index d88e1fdfb147..89c4753c2760 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -6,6 +6,7 @@ #include <limits.h> #include <fcntl.h> #include <string.h> +#include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> @@ -25,6 +26,7 @@ #include <netinet/in.h> #include <linux/tcp.h> +#include <linux/time_types.h> extern int optind; @@ -66,6 +68,13 @@ static unsigned int cfg_do_w; static int cfg_wait; static uint32_t cfg_mark; +struct cfg_cmsg_types { + unsigned int cmsg_enabled:1; + unsigned int timestampns:1; +}; + +static struct cfg_cmsg_types cfg_cmsg_types; + static void die_usage(void) { fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]" @@ -80,11 +89,22 @@ static void die_usage(void) fprintf(stderr, "\t-M mark -- set socket packet mark\n"); fprintf(stderr, "\t-u -- check mptcp ulp\n"); fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); + fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n"); fprintf(stderr, "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); exit(1); } +static void xerror(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + static void handle_signal(int nr) { quit = true; @@ -338,6 +358,58 @@ static size_t do_write(const int fd, char *buf, const size_t len) return offset; } +static void process_cmsg(struct msghdr *msgh) +{ + struct __kernel_timespec ts; + bool ts_found = false; + struct cmsghdr *cmsg; + + for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) { + memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts)); + ts_found = true; + continue; + } + } + + if (cfg_cmsg_types.timestampns) { + if (!ts_found) + xerror("TIMESTAMPNS not present\n"); + } +} + +static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) +{ + char msg_buf[8192]; + struct iovec iov = { + .iov_base = buf, + .iov_len = len, + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = msg_buf, + .msg_controllen = sizeof(msg_buf), + }; + int flags = 0; + int ret = recvmsg(fd, &msg, flags); + + if (ret <= 0) + return ret; + + if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled) + xerror("got %lu bytes of cmsg data, expected 0\n", + (unsigned long)msg.msg_controllen); + + if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled) + xerror("%s\n", "got no cmsg data"); + + if (msg.msg_controllen) + process_cmsg(&msg); + + return ret; +} + static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) { int ret = 0; @@ -357,6 +429,8 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) } else if (cfg_peek == CFG_AFTER_PEEK) { ret = recv(fd, buf, cap, MSG_PEEK); ret = (ret < 0) ? ret : read(fd, buf, cap); + } else if (cfg_cmsg_types.cmsg_enabled) { + ret = do_recvmsg_cmsg(fd, buf, cap); } else { ret = read(fd, buf, cap); } @@ -786,6 +860,48 @@ static void init_rng(void) srand(foo); } +static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen) +{ + int err; + + err = setsockopt(fd, level, optname, optval, optlen); + if (err) { + perror("setsockopt"); + exit(1); + } +} + +static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg) +{ + static const unsigned int on = 1; + + if (cmsg->timestampns) + xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on)); +} + +static void parse_cmsg_types(const char *type) +{ + char *next = strchr(type, ','); + unsigned int len = 0; + + cfg_cmsg_types.cmsg_enabled = 1; + + if (next) { + parse_cmsg_types(next + 1); + len = next - type; + } else { + len = strlen(type); + } + + if (strncmp(type, "TIMESTAMPNS", len) == 0) { + cfg_cmsg_types.timestampns = 1; + return; + } + + fprintf(stderr, "Unrecognized cmsg option %s\n", type); + exit(1); +} + int main_loop(void) { int fd; @@ -801,6 +917,8 @@ int main_loop(void) set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); + if (cfg_cmsg_types.cmsg_enabled) + apply_cmsg_types(fd, &cfg_cmsg_types); return copyfd_io(0, fd, 1); } @@ -887,7 +1005,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:")) != -1) { + while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:c:")) != -1) { switch (c) { case 'j': cfg_join = true; @@ -943,6 +1061,9 @@ static void parse_opts(int argc, char **argv) case 'P': cfg_peek = parse_peek(optarg); break; + case 'c': + parse_cmsg_types(optarg); + break; } } @@ -976,6 +1097,8 @@ int main(int argc, char *argv[]) set_sndbuf(fd, cfg_sndbuf); if (cfg_mark) set_mark(fd, cfg_mark); + if (cfg_cmsg_types.cmsg_enabled) + apply_cmsg_types(fd, &cfg_cmsg_types); return main_loop_s(fd); } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 3c4cb72ed8a4..559173a8e387 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -3,7 +3,7 @@ time_start=$(date +%s) -optstring="S:R:d:e:l:r:h4cm:f:t" +optstring="S:R:d:e:l:r:h4cm:f:tC" ret=0 sin="" sout="" @@ -22,6 +22,7 @@ sndbuf=0 rcvbuf=0 options_log=true do_tcp=0 +checksum=false filesize=0 if [ $tc_loss -eq 100 ];then @@ -47,6 +48,7 @@ usage() { echo -e "\t-R: set rcvbuf value (default: use kernel default)" echo -e "\t-m: test mode (poll, sendfile; default: poll)" echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)" + echo -e "\t-C: enable the MPTCP data checksum" } while getopts "$optstring" option;do @@ -104,6 +106,9 @@ while getopts "$optstring" option;do "t") do_tcp=$((do_tcp+1)) ;; + "C") + checksum=true + ;; "?") usage $0 exit 1 @@ -197,8 +202,11 @@ ip -net "$ns4" link set ns4eth3 up ip -net "$ns4" route add default via 10.0.3.2 ip -net "$ns4" route add default via dead:beef:3::2 -# use TCP syn cookies, even if no flooding was detected. -ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 +if $checksum; then + for i in "$ns1" "$ns2" "$ns3" "$ns4";do + ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1 + done +fi set_ethtool_flags() { local ns="$1" @@ -501,6 +509,7 @@ do_transfer() local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") + local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -518,10 +527,14 @@ do_transfer() "${stat_synrx_now_l}" "${expect_synrx}" 1>&2 retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then - printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ - "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 - rets=1 + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ooo_now} -eq 0 ]; then + printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ + "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 + rets=1 + else + printf "[ Note ] fallback due to TCP OoO" + fi fi if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then @@ -667,6 +680,25 @@ run_tests_peekmode() run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" } +display_time() +{ + time_end=$(date +%s) + time_run=$((time_end-time_start)) + + echo "Time: ${time_run} seconds" +} + +stop_if_error() +{ + local msg="$1" + + if [ ${ret} -ne 0 ]; then + echo "FAIL: ${msg}" 1>&2 + display_time + exit ${ret} + fi +} + make_file "$cin" "client" make_file "$sin" "server" @@ -674,6 +706,8 @@ check_mptcp_disabled check_mptcp_ulp_setsockopt +stop_if_error "The kernel configuration is not valid for MPTCP" + echo "INFO: validating network environment with pings" for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns1" $sender 10.0.1.1 @@ -693,6 +727,8 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns4" $sender dead:beef:3::1 done +stop_if_error "Could not even run ping tests" + [ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms echo -n "INFO: Using loss of $tc_loss " test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " @@ -720,18 +756,24 @@ echo "on ns3eth4" tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder +run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 +stop_if_error "Could not even run loopback test" + +run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 +stop_if_error "Could not even run loopback v6 test" + for sender in $ns1 $ns2 $ns3 $ns4;do - run_tests_lo "$ns1" "$sender" 10.0.1.1 1 - if [ $ret -ne 0 ] ;then - echo "FAIL: Could not even run loopback test" 1>&2 - exit $ret - fi - run_tests_lo "$ns1" $sender dead:beef:1::1 1 - if [ $ret -ne 0 ] ;then - echo "FAIL: Could not even run loopback v6 test" 2>&1 - exit $ret + # ns1<->ns2 is not subject to reordering/tc delays. Use it to test + # mptcp syncookie support. + if [ $sender = $ns1 ]; then + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 + else + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1 fi + run_tests "$ns1" $sender 10.0.1.1 + run_tests "$ns1" $sender dead:beef:1::1 + run_tests "$ns2" $sender 10.0.1.2 run_tests "$ns2" $sender dead:beef:1::2 run_tests "$ns2" $sender 10.0.2.1 @@ -744,14 +786,13 @@ for sender in $ns1 $ns2 $ns3 $ns4;do run_tests "$ns4" $sender 10.0.3.1 run_tests "$ns4" $sender dead:beef:3::1 + + stop_if_error "Tests with $sender as a sender have failed" done run_tests_peekmode "saveWithPeek" run_tests_peekmode "saveAfterPeek" +stop_if_error "Tests with peek mode have failed" -time_end=$(date +%s) -time_run=$((time_end-time_start)) - -echo "Time: ${time_run} seconds" - +display_time exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index fd99485cf2a4..9a191c1a5de8 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -12,6 +12,7 @@ timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) mptcp_connect="" capture=0 +checksum=0 do_all_tests=1 TEST_COUNT=0 @@ -49,6 +50,9 @@ init() ip netns exec $netns sysctl -q net.mptcp.enabled=1 ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 + if [ $checksum -eq 1 ]; then + ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1 + fi done # ns1 ns2 @@ -124,6 +128,28 @@ reset_with_add_addr_timeout() -j DROP } +reset_with_checksum() +{ + local ns1_enable=$1 + local ns2_enable=$2 + + reset + + ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable + ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable +} + +reset_with_allow_join_id0() +{ + local ns1_enable=$1 + local ns2_enable=$2 + + reset + + ip netns exec $ns1 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns1_enable + ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable +} + ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" @@ -476,6 +502,45 @@ run_tests() fi } +chk_csum_nr() +{ + local msg=${1:-""} + local count + local dump_stats + + if [ ! -z "$msg" ]; then + printf "%02u" "$TEST_COUNT" + else + echo -n " " + fi + printf " %-36s %s" "$msg" "sum" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != 0 ]; then + echo "[fail] got $count data checksum error[s] expected 0" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + echo -n " - csum " + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != 0 ]; then + echo "[fail] got $count data checksum error[s] expected 0" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + chk_join_nr() { local msg="$1" @@ -523,6 +588,9 @@ chk_join_nr() echo Client ns stats ip netns exec $ns2 nstat -as | grep MPTcp fi + if [ $checksum -eq 1 ]; then + chk_csum_nr + fi } chk_add_nr() @@ -1374,6 +1442,94 @@ syncookies_tests() chk_add_nr 1 1 } +checksum_tests() +{ + # checksum test 0 0 + reset_with_checksum 0 0 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 0 0" + + # checksum test 1 1 + reset_with_checksum 1 1 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 1 1" + + # checksum test 0 1 + reset_with_checksum 0 1 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 0 1" + + # checksum test 1 0 + reset_with_checksum 1 0 + ip netns exec $ns1 ./pm_nl_ctl limits 0 1 + ip netns exec $ns2 ./pm_nl_ctl limits 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_csum_nr "checksum test 1 0" +} + +deny_join_id0_tests() +{ + # subflow allow join id0 ns1 + reset_with_allow_join_id0 1 0 + ip netns exec $ns1 ./pm_nl_ctl limits 1 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow allow join id0 ns1" 1 1 1 + + # subflow allow join id0 ns2 + reset_with_allow_join_id0 0 1 + ip netns exec $ns1 ./pm_nl_ctl limits 1 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "single subflow allow join id0 ns2" 0 0 0 + + # signal address allow join id0 ns1 + # ADD_ADDRs are not affected by allow_join_id0 value. + reset_with_allow_join_id0 1 0 + ip netns exec $ns1 ./pm_nl_ctl limits 1 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal address allow join id0 ns1" 1 1 1 + chk_add_nr 1 1 + + # signal address allow join id0 ns2 + # ADD_ADDRs are not affected by allow_join_id0 value. + reset_with_allow_join_id0 0 1 + ip netns exec $ns1 ./pm_nl_ctl limits 1 1 + ip netns exec $ns2 ./pm_nl_ctl limits 1 1 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal address allow join id0 ns2" 1 1 1 + chk_add_nr 1 1 + + # subflow and address allow join id0 ns1 + reset_with_allow_join_id0 1 0 + ip netns exec $ns1 ./pm_nl_ctl limits 2 2 + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflow and address allow join id0 1" 2 2 2 + + # subflow and address allow join id0 ns2 + reset_with_allow_join_id0 0 1 + ip netns exec $ns1 ./pm_nl_ctl limits 2 2 + ip netns exec $ns2 ./pm_nl_ctl limits 2 2 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "subflow and address allow join id0 2" 1 1 1 +} + all_tests() { subflows_tests @@ -1387,6 +1543,8 @@ all_tests() backup_tests add_addr_ports_tests syncookies_tests + checksum_tests + deny_join_id0_tests } usage() @@ -1403,7 +1561,10 @@ usage() echo " -b backup_tests" echo " -p add_addr_ports_tests" echo " -k syncookies_tests" + echo " -S checksum_tests" + echo " -d deny_join_id0_tests" echo " -c capture pcap files" + echo " -C enable data checksum" echo " -h help" } @@ -1418,13 +1579,16 @@ make_file "$sin" "server" 1 trap cleanup EXIT for arg in "$@"; do - # check for "capture" arg before launching tests + # check for "capture/checksum" args before launching tests if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then capture=1 fi + if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then + checksum=1 + fi - # exception for the capture option, the rest means: a part of the tests - if [ "${arg}" != "-c" ]; then + # exception for the capture/checksum options, the rest means: a part of the tests + if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then do_all_tests=0 fi done @@ -1434,7 +1598,7 @@ if [ $do_all_tests -eq 1 ]; then exit $ret fi -while getopts 'fsltra64bpkch' opt; do +while getopts 'fsltra64bpkdchCS' opt; do case $opt in f) subflows_tests @@ -1469,8 +1633,16 @@ while getopts 'fsltra64bpkch' opt; do k) syncookies_tests ;; + S) + checksum_tests + ;; + d) + deny_join_id0_tests + ;; c) ;; + C) + ;; h | *) usage ;; diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 2fa13946ac04..1579e471a5e7 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -178,7 +178,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${listener_ns} \ - $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} \ + $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS \ ${local_addr} < "$sin" > "$sout" & spid=$! @@ -186,7 +186,7 @@ do_transfer() timeout ${timeout_test} \ ip netns exec ${connector_ns} \ - $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} \ + $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS \ $connect_addr < "$cin" > "$cout" & cpid=$! diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 3aeef3bcb101..fd63ebfe9a2b 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -60,6 +60,8 @@ setup() for i in "$ns1" "$ns2" "$ns3";do ip netns add $i || exit $ksft_skip ip -net $i link set lo up + ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0 + ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0 done ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" @@ -80,7 +82,6 @@ setup() ip netns exec "$ns1" ./pm_nl_ctl limits 1 1 ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow - ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0 ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad diff --git a/tools/testing/selftests/net/so_netns_cookie.c b/tools/testing/selftests/net/so_netns_cookie.c new file mode 100644 index 000000000000..b39e87e967cd --- /dev/null +++ b/tools/testing/selftests/net/so_netns_cookie.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sched.h> +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/socket.h> + +#ifndef SO_NETNS_COOKIE +#define SO_NETNS_COOKIE 71 +#endif + +#define pr_err(fmt, ...) \ + ({ \ + fprintf(stderr, "%s:%d:" fmt ": %m\n", \ + __func__, __LINE__, ##__VA_ARGS__); \ + 1; \ + }) + +int main(int argc, char *argvp[]) +{ + uint64_t cookie1, cookie2; + socklen_t vallen; + int sock1, sock2; + + sock1 = socket(AF_INET, SOCK_STREAM, 0); + if (sock1 < 0) + return pr_err("Unable to create TCP socket"); + + vallen = sizeof(cookie1); + if (getsockopt(sock1, SOL_SOCKET, SO_NETNS_COOKIE, &cookie1, &vallen) != 0) + return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)"); + + if (!cookie1) + return pr_err("SO_NETNS_COOKIE returned zero cookie"); + + if (unshare(CLONE_NEWNET)) + return pr_err("unshare"); + + sock2 = socket(AF_INET, SOCK_STREAM, 0); + if (sock2 < 0) + return pr_err("Unable to create TCP socket"); + + vallen = sizeof(cookie2); + if (getsockopt(sock2, SOL_SOCKET, SO_NETNS_COOKIE, &cookie2, &vallen) != 0) + return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)"); + + if (!cookie2) + return pr_err("SO_NETNS_COOKIE returned zero cookie"); + + if (cookie1 == cookie2) + return pr_err("SO_NETNS_COOKIE returned identical cookies for distinct ns"); + + close(sock1); + close(sock2); + return 0; +} diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh new file mode 100755 index 000000000000..75ada17ac061 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh @@ -0,0 +1,573 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it> + +# This test is designed for evaluating the new SRv6 End.DT46 Behavior used for +# implementing IPv4/IPv6 L3 VPN use cases. +# +# The current SRv6 code in the Linux kernel only implements SRv6 End.DT4 and +# End.DT6 Behaviors which can be used respectively to support IPv4-in-IPv6 and +# IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a +# single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic. +# The SRv6 End.DT46 Behavior implementation is meant to support the +# decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel. +# Therefore, the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies +# the setup and operations of SRv6 VPNs. +# +# Hereafter a network diagram is shown, where two different tenants (named 100 +# and 200) offer IPv4/IPv6 L3 VPN services allowing hosts to communicate with +# each other across an IPv6 network. +# +# Only hosts belonging to the same tenant (and to the same VPN) can communicate +# with each other. Instead, the communication among hosts of different tenants +# is forbidden. +# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the +# IPv4/IPv6 L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected +# using the IPv4/IPv6 L3 VPN of tenant 200. Cross connection between tenant 100 +# and tenant 200 is forbidden and thus, for example, hs-t100-1 cannot reach +# hs-t200-3 and vice versa. +# +# Routers rt-1 and rt-2 implement IPv4/IPv6 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DT46 Behavior and c) VRF. +# +# To explain how an IPv4/IPv6 L3 VPN based on SRv6 works, let us briefly +# consider an example where, within the same domain of tenant 100, the host +# hs-t100-1 pings the host hs-t100-2. +# +# First of all, L2 reachability of the host hs-t100-2 is taken into account by +# the router rt-1 which acts as a arp/ndp proxy. +# +# When the host hs-t100-1 sends an IPv6 or IPv4 packet destined to hs-t100-2, +# the router rt-1 receives the packet on the internal veth-t100 interface. Such +# interface is enslaved to the VRF vrf-100 whose associated table contains the +# SRv6 Encap route for encapsulating any IPv6 or IPv4 packet in a IPv6 plus the +# Segment Routing Header (SRH) packet. This packet is sent through the (IPv6) +# core network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DT46 Behavior removes the outer IPv6+SRH headers and +# performs the lookup on the vrf-100 table using the destination address of +# the decapsulated IPv6 or IPv4 packet. Afterwards, the packet is sent to the +# host hs-t100-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the roles of rt-1 +# and rt-2 are swapped. +# +# Of course, the IPv4/IPv6 L3 VPN for tenant 200 works exactly as the IPv4/IPv6 +# L3 VPN for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are +# able to connect with each other. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-t100-1 netns | | hs-t100-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::1/64 | | | | cafe::2/64 | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | cafe::254/64 | | | | cafe::254/64 | | +# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | | +# | +-------+-------+ | localsid | | | | localsid | +-------+-------- | +# | | | table | | | | table | | | +# | +----+----+ +----------+ | | +----------+ +----+----+ | +# | | vrf-100 | | | | vrf-100 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | veth0 | | | | veth0 | | +# | | fd00::1/64 |.|...|.| fd00::2/64 | | +# | +---------+ +------------+ | | +------------+ +---------+ | +# | | vrf-200 | | | | vrf-200 | | +# | +----+----+ | | +----+----+ | +# | | | | | | +# | +-------+-------+ | | +-------+-------- | +# | | veth-t200 | | | | veth-t200 | | +# | | cafe::254/64 | | | | cafe::254/64 | | +# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | | +# | +---------------+ rt-1 netns | | rt-2 netns +---------------- | +# | . | | . | +# +-----------------------------------+ +-----------------------------------+ +# . . +# . . +# . . +# . . +# +-------------------+ +-------------------+ +# | . | | . | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::3/64 | | | | cafe::4/64 | | +# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | | +# | +-------------+ | | +-------------+ | +# | | | | +# | hs-t200-3 netns | | hs-t200-4 netns | +# | | | | +# +-------------------+ +-------------------+ +# +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table (table 90) +# +--------------------------------------------------+ +# |SID |Action | +# +--------------------------------------------------+ +# |fc00:21:100::6046|apply SRv6 End.DT46 vrftable 100| +# +--------------------------------------------------+ +# |fc00:21:200::6046|apply SRv6 End.DT46 vrftable 200| +# +--------------------------------------------------+ +# +# rt-1: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::2 |apply seg6 encap segs fc00:12:100::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t100 | +# +---------------------------------------------------+ +# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t100 | +# +---------------------------------------------------+ +# +# rt-1: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::4 |apply seg6 encap segs fc00:12:200::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t200 | +# +---------------------------------------------------+ +# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t200 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table (table 90) +# +--------------------------------------------------+ +# |SID |Action | +# +--------------------------------------------------+ +# |fc00:12:100::6046|apply SRv6 End.DT46 vrftable 100| +# +--------------------------------------------------+ +# |fc00:12:200::6046|apply SRv6 End.DT46 vrftable 200| +# +--------------------------------------------------+ +# +# rt-2: VRF tenant 100 (table 100) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::1 |apply seg6 encap segs fc00:21:100::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t100 | +# +---------------------------------------------------+ +# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t100 | +# +---------------------------------------------------+ +# +# rt-2: VRF tenant 200 (table 200) +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::3 |apply seg6 encap segs fc00:21:200::6046| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth-t200 | +# +---------------------------------------------------+ +# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6046| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth-t200 | +# +---------------------------------------------------+ +# + +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fd00 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fc00 +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-t${tid}-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + # configure the VRF for the tenant X on the router which is directly + # connected to the source host. + ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid} + ip -netns ${rtname} link set vrf-${tid} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + # enslave the veth-tX interface to the vrf-X in the access router + ip -netns ${rtname} link set ${rtveth} master vrf-${tid} + ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad + ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1 + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0 + + ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local rtveth=veth-t${tid} + local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046 + + ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth} + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \ + via fd00::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \ + encap seg6local action End.DT46 vrftable ${tid} dev vrf-${tid} + + # all sids for VPNs start with a common locator which is fc00::/16. + # Routes for handling the SRv6 End.DT46 behavior instances are grouped + # together in the 'localsid' table. + # + # NOTE: added only once + if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \ + grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then + ip -netns ${rtdst_name} -6 rule add \ + to ${VPN_LOCATOR_SERVICE}::/16 \ + lookup ${LOCALSID_TABLE_ID} prio 999 + fi + + # set default routes to unreachable for both ipv4 and ipv6 + ip -netns ${rtsrc_name} -6 route add unreachable default metric 4278198272 \ + vrf vrf-${tid} + + ip -netns ${rtsrc_name} -4 route add unreachable default metric 4278198272 \ + vrf vrf-${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 #args: host router tenant + setup_hs 2 2 100 + + # setup two hosts for the tenant 200 + # - host hs-3 is directly connected to the router rt-1; + # - host hs-4 is directly connected to the router rt-2. + setup_hs 3 1 200 + setup_hs 4 2 200 + + # setup the IPv4/IPv6 L3 VPN which connects the host hs-t100-1 and host + # hs-t100-2 within the same tenant 100. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 + + # setup the IPv4/IPv6 L3 VPN which connects the host hs-t200-3 and host + # hs-t200-4 within the same tenant 200. + setup_vpn_config 3 1 4 2 200 + setup_vpn_config 4 2 3 1 200 +} + +check_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + + ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc=$1 + local rtdst=$2 + + check_rt_connectivity ${rtsrc} ${rtdst} + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})" + + check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})" + +} + +check_and_log_hs_isolation() +{ + local hssrc=$1 + local tidsrc=$2 + local hsdst=$3 + local tiddst=$4 + + check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tidsrc} + # NOTE: ping should fail + log_test $? 1 "IPv6 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}" + + check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tidsrc} + # NOTE: ping should fail + log_test $? 1 "IPv4 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}" + +} + + +check_and_log_hs2gw_connectivity() +{ + local hssrc=$1 + local tid=$2 + + check_hs_ipv6_connectivity ${hssrc} 254 ${tid} + log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})" + + check_hs_ipv4_connectivity ${hssrc} 254 ${tid} + log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})" + +} + +router_tests() +{ + log_section "IPv6 routers connectivity test" + + check_and_log_rt_connectivity 1 2 + check_and_log_rt_connectivity 2 1 +} + +host2gateway_tests() +{ + log_section "IPv4/IPv6 connectivity test among hosts and gateway" + + check_and_log_hs2gw_connectivity 1 100 + check_and_log_hs2gw_connectivity 2 100 + + check_and_log_hs2gw_connectivity 3 200 + check_and_log_hs2gw_connectivity 4 200 +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 + + check_and_log_hs_connectivity 3 4 200 + check_and_log_hs_connectivity 4 3 200 +} + +host_vpn_isolation_tests() +{ + local i + local j + local k + local tmp + local l1="1 2" + local l2="3 4" + local t1=100 + local t2=200 + + log_section "SRv6 VPN isolation test among hosts in different tentants" + + for k in 0 1; do + for i in ${l1}; do + for j in ${l2}; do + check_and_log_hs_isolation ${i} ${t1} ${j} ${t2} + done + done + + # let us test the reverse path + tmp="${l1}"; l1="${l2}"; l2="${tmp}" + tmp=${t1}; t1=${t2}; t2=${tmp} + done +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +modprobe vrf &>/dev/null +if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit 0 +fi + +cleanup &>/dev/null + +setup + +router_tests +host2gateway_tests +host_vpn_tests +host_vpn_isolation_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 426d07875a48..112d41d01b12 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -25,6 +25,47 @@ #define TLS_PAYLOAD_MAX_LEN 16384 #define SOL_TLS 282 +struct tls_crypto_info_keys { + union { + struct tls12_crypto_info_aes_gcm_128 aes128; + struct tls12_crypto_info_chacha20_poly1305 chacha20; + }; + size_t len; +}; + +static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, + struct tls_crypto_info_keys *tls12) +{ + memset(tls12, 0, sizeof(*tls12)); + + switch (cipher_type) { + case TLS_CIPHER_CHACHA20_POLY1305: + tls12->len = sizeof(struct tls12_crypto_info_chacha20_poly1305); + tls12->chacha20.info.version = tls_version; + tls12->chacha20.info.cipher_type = cipher_type; + break; + case TLS_CIPHER_AES_GCM_128: + tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_128); + tls12->aes128.info.version = tls_version; + tls12->aes128.info.cipher_type = cipher_type; + break; + default: + break; + } +} + +static void memrnd(void *s, size_t n) +{ + int *dword = s; + char *byte; + + for (; n >= 4; n -= 4) + *dword++ = rand(); + byte = (void *)dword; + while (n--) + *byte++ = rand(); +} + FIXTURE(tls_basic) { int fd, cfd; @@ -133,33 +174,16 @@ FIXTURE_VARIANT_ADD(tls, 13_chacha) FIXTURE_SETUP(tls) { - union { - struct tls12_crypto_info_aes_gcm_128 aes128; - struct tls12_crypto_info_chacha20_poly1305 chacha20; - } tls12; + struct tls_crypto_info_keys tls12; struct sockaddr_in addr; socklen_t len; int sfd, ret; - size_t tls12_sz; self->notls = false; len = sizeof(addr); - memset(&tls12, 0, sizeof(tls12)); - switch (variant->cipher_type) { - case TLS_CIPHER_CHACHA20_POLY1305: - tls12_sz = sizeof(struct tls12_crypto_info_chacha20_poly1305); - tls12.chacha20.info.version = variant->tls_version; - tls12.chacha20.info.cipher_type = variant->cipher_type; - break; - case TLS_CIPHER_AES_GCM_128: - tls12_sz = sizeof(struct tls12_crypto_info_aes_gcm_128); - tls12.aes128.info.version = variant->tls_version; - tls12.aes128.info.cipher_type = variant->cipher_type; - break; - default: - tls12_sz = 0; - } + tls_crypto_info_init(variant->tls_version, variant->cipher_type, + &tls12); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); @@ -187,7 +211,7 @@ FIXTURE_SETUP(tls) if (!self->notls) { ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, - tls12_sz); + tls12.len); ASSERT_EQ(ret, 0); } @@ -200,7 +224,7 @@ FIXTURE_SETUP(tls) ASSERT_EQ(ret, 0); ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, - tls12_sz); + tls12.len); ASSERT_EQ(ret, 0); } @@ -308,6 +332,8 @@ TEST_F(tls, recv_max) char recv_mem[TLS_PAYLOAD_MAX_LEN]; char buf[TLS_PAYLOAD_MAX_LEN]; + memrnd(buf, sizeof(buf)); + EXPECT_GE(send(self->fd, buf, send_len, 0), 0); EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1); EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0); @@ -588,6 +614,8 @@ TEST_F(tls, recvmsg_single_max) struct iovec vec; struct msghdr hdr; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len); vec.iov_base = (char *)recv_mem; vec.iov_len = TLS_PAYLOAD_MAX_LEN; @@ -610,6 +638,8 @@ TEST_F(tls, recvmsg_multiple) struct msghdr hdr; int i; + memrnd(buf, sizeof(buf)); + EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len); for (i = 0; i < msg_iovlen; i++) { iov_base[i] = (char *)malloc(iov_len); @@ -634,6 +664,8 @@ TEST_F(tls, single_send_multiple_recv) char send_mem[TLS_PAYLOAD_MAX_LEN * 2]; char recv_mem[TLS_PAYLOAD_MAX_LEN * 2]; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0); memset(recv_mem, 0, total_len); @@ -834,18 +866,17 @@ TEST_F(tls, bidir) int ret; if (!self->notls) { - struct tls12_crypto_info_aes_gcm_128 tls12; + struct tls_crypto_info_keys tls12; - memset(&tls12, 0, sizeof(tls12)); - tls12.info.version = variant->tls_version; - tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128; + tls_crypto_info_init(variant->tls_version, variant->cipher_type, + &tls12); ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12, - sizeof(tls12)); + tls12.len); ASSERT_EQ(ret, 0); ret = setsockopt(self->cfd, SOL_TLS, TLS_TX, &tls12, - sizeof(tls12)); + tls12.len); ASSERT_EQ(ret, 0); } diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index a8fa64136282..7f26591f236b 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 readonly BASE="ns-$(mktemp -u XXXXXX)" diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh index dbf0421986df..66354cdd5ce4 100755 --- a/tools/testing/selftests/net/unicast_extensions.sh +++ b/tools/testing/selftests/net/unicast_extensions.sh @@ -189,6 +189,15 @@ segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255 route_test 240.5.6.7 240.5.6.1 255.1.2.1 255.1.2.3 24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)" route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)" # +# Test support for lowest address ending in .0 +segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (/24)" +# +# Test support for lowest address not ending in .0 +segmenttest 192.168.101.192 192.168.101.193 26 "assign and ping lowest address (/26)" +# +# Routing using lowest address as a gateway/endpoint +route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address" +# # ============================================== # ==== TESTS THAT CURRENTLY EXPECT FAILURE ===== # ============================================== @@ -202,14 +211,6 @@ segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forb # Currently Linux does not allow this, so this should fail too segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)" # -# Test support for lowest address -# Currently Linux does not allow this, so this should fail too -segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (is forbidden)" -# -# Routing using lowest address as a gateway/endpoint -# Currently Linux does not allow this, so this should fail too -route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address (is forbidden)" -# # Test support for unicast use of class D # Currently Linux does not allow this, so this should fail too segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)" diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 2fedc0781ce8..11d7cdb898c0 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -18,7 +18,8 @@ ret=0 cleanup() { local ns - local -r jobs="$(jobs -p)" + local jobs + readonly jobs="$(jobs -p)" [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null rm -f $STATS @@ -108,7 +109,7 @@ chk_gro() { if [ ! -f ../bpf/xdp_dummy.o ]; then echo "Missing xdp_dummy helper. Build bpf selftest first" - exit -1 + exit 1 fi create_ns diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 3171069a6b46..cd6430b39982 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for netfilter selftests -TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ +TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh new file mode 100755 index 000000000000..6caf6ac8c285 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_fib.sh @@ -0,0 +1,221 @@ +#!/bin/bash +# +# This tests the fib expression. +# +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +nsrouter="nsrouter-$sfx" +timeout=4 + +log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) + +cleanup() +{ + ip netns del ${ns1} + ip netns del ${ns2} + ip netns del ${nsrouter} + + [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ${nsrouter} +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace" + exit $ksft_skip +fi + +trap cleanup EXIT + +dmesg | grep -q ' nft_rpfilter: ' +if [ $? -eq 0 ]; then + dmesg -c | grep ' nft_rpfilter: ' + echo "WARN: a previous test run has failed" 1>&2 +fi + +sysctl -q net.netfilter.nf_log_all_netns=1 +ip netns add ${ns1} +ip netns add ${ns2} + +load_ruleset() { + local netns=$1 + +ip netns exec ${netns} nft -f /dev/stdin <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority 0; policy accept; + fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop + } +} +EOF +} + +load_ruleset_count() { + local netns=$1 + +ip netns exec ${netns} nft -f /dev/stdin <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority 0; policy accept; + ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop + ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop + } +} +EOF +} + +check_drops() { + dmesg | grep -q ' nft_rpfilter: ' + if [ $? -eq 0 ]; then + dmesg | grep ' nft_rpfilter: ' + echo "FAIL: rpfilter did drop packets" + return 1 + fi + + return 0 +} + +check_fib_counter() { + local want=$1 + local ns=$2 + local address=$3 + + line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" ) + ret=$? + + if [ $ret -ne 0 ];then + echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2 + ip netns exec ${ns} nft list table inet filter + return 1 + fi + + if [ $want -gt 0 ]; then + echo "PASS: fib expression did drop packets for $address" + fi + + return 0 +} + +load_ruleset ${nsrouter} +load_ruleset ${ns1} +load_ruleset ${ns2} + +ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2} + +ip -net ${nsrouter} link set lo up +ip -net ${nsrouter} link set veth0 up +ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0 +ip -net ${nsrouter} addr add dead:1::1/64 dev veth0 + +ip -net ${nsrouter} link set veth1 up +ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1 +ip -net ${nsrouter} addr add dead:2::1/64 dev veth1 + +ip -net ${ns1} link set lo up +ip -net ${ns1} link set eth0 up + +ip -net ${ns2} link set lo up +ip -net ${ns2} link set eth0 up + +ip -net ${ns1} addr add 10.0.1.99/24 dev eth0 +ip -net ${ns1} addr add dead:1::99/64 dev eth0 +ip -net ${ns1} route add default via 10.0.1.1 +ip -net ${ns1} route add default via dead:1::1 + +ip -net ${ns2} addr add 10.0.2.99/24 dev eth0 +ip -net ${ns2} addr add dead:2::99/64 dev eth0 +ip -net ${ns2} route add default via 10.0.2.1 +ip -net ${ns2} route add default via dead:2::1 + +test_ping() { + local daddr4=$1 + local daddr6=$2 + + ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null + ret=$? + if [ $ret -ne 0 ];then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2 + return 1 + fi + + ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null + ret=$? + if [ $ret -ne 0 ];then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2 + return 1 + fi + + return 0 +} + +ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + +sleep 3 + +test_ping 10.0.2.1 dead:2::1 || exit 1 +check_drops || exit 1 + +test_ping 10.0.2.99 dead:2::99 || exit 1 +check_drops || exit 1 + +echo "PASS: fib expression did not cause unwanted packet drops" + +ip netns exec ${nsrouter} nft flush table inet filter + +ip -net ${ns1} route del default +ip -net ${ns1} -6 route del default + +ip -net ${ns1} addr del 10.0.1.99/24 dev eth0 +ip -net ${ns1} addr del dead:1::99/64 dev eth0 + +ip -net ${ns1} addr add 10.0.2.99/24 dev eth0 +ip -net ${ns1} addr add dead:2::99/64 dev eth0 + +ip -net ${ns1} route add default via 10.0.2.1 +ip -net ${ns1} -6 route add default via dead:2::1 + +ip -net ${nsrouter} addr add dead:2::1/64 dev veth0 + +# switch to ruleset that doesn't log, this time +# its expected that this does drop the packets. +load_ruleset_count ${nsrouter} + +# ns1 has a default route, but nsrouter does not. +# must not check return value, ping to 1.1.1.1 will +# fail. +check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1 +check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1 + +ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null +check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1 + +sleep 2 +ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null +check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1 + +exit 0 diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c index 78ddf5e11625..8e83cf91513a 100644 --- a/tools/testing/selftests/perf_events/sigtrap_threads.c +++ b/tools/testing/selftests/perf_events/sigtrap_threads.c @@ -43,7 +43,7 @@ static struct { siginfo_t first_siginfo; /* First observed siginfo_t. */ } ctx; -/* Unique value to check si_perf is correctly set from perf_event_attr::sig_data. */ +/* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */ #define TEST_SIG_DATA(addr) (~(unsigned long)(addr)) static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr) @@ -164,8 +164,8 @@ TEST_F(sigtrap_threads, enable_event) EXPECT_EQ(ctx.signal_count, NUM_THREADS); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -183,8 +183,8 @@ TEST_F(sigtrap_threads, modify_and_enable_event) EXPECT_EQ(ctx.signal_count, NUM_THREADS); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -203,8 +203,8 @@ TEST_F(sigtrap_threads, signal_stress) EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index bed4b5318a86..8f3e72e626fa 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -10,6 +10,7 @@ /proc-self-map-files-002 /proc-self-syscall /proc-self-wchan +/proc-subset-pid /proc-uptime-001 /proc-uptime-002 /read diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 98c3b647f54d..e3d5c77a8612 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1753,16 +1753,25 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define SYSCALL_RET_SET(_regs, _val) \ do { \ typeof(_val) _result = (_val); \ - /* \ - * A syscall error is signaled by CR0 SO bit \ - * and the code is stored as a positive value. \ - */ \ - if (_result < 0) { \ - SYSCALL_RET(_regs) = -_result; \ - (_regs).ccr |= 0x10000000; \ - } else { \ + if ((_regs.trap & 0xfff0) == 0x3000) { \ + /* \ + * scv 0 system call uses -ve result \ + * for error, so no need to adjust. \ + */ \ SYSCALL_RET(_regs) = _result; \ - (_regs).ccr &= ~0x10000000; \ + } else { \ + /* \ + * A syscall error is signaled by the \ + * CR0 SO bit and the code is stored as \ + * a positive value. \ + */ \ + if (_result < 0) { \ + SYSCALL_RET(_regs) = -_result; \ + (_regs).ccr |= 0x10000000; \ + } else { \ + SYSCALL_RET(_regs) = _result; \ + (_regs).ccr &= ~0x10000000; \ + } \ } \ } while (0) # define SYSCALL_RET_SET_ON_PTRACE_EXIT diff --git a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py index 229ee185b27e..254136e3da5a 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py @@ -29,22 +29,26 @@ class SubPlugin(TdcPlugin): return # Check for required fields - scapyinfo = self.args.caseinfo['scapy'] - scapy_keys = ['iface', 'count', 'packet'] - missing_keys = [] - keyfail = False - for k in scapy_keys: - if k not in scapyinfo: - keyfail = True - missing_keys.add(k) - if keyfail: - print('{}: Scapy block present in the test, but is missing info:' - .format(self.sub_class)) - print('{}'.format(missing_keys)) + lscapyinfo = self.args.caseinfo['scapy'] + if type(lscapyinfo) != list: + lscapyinfo = [ lscapyinfo, ] - pkt = eval(scapyinfo['packet']) - if '$' in scapyinfo['iface']: - tpl = Template(scapyinfo['iface']) - scapyinfo['iface'] = tpl.safe_substitute(NAMES) - for count in range(scapyinfo['count']): - sendp(pkt, iface=scapyinfo['iface']) + for scapyinfo in lscapyinfo: + scapy_keys = ['iface', 'count', 'packet'] + missing_keys = [] + keyfail = False + for k in scapy_keys: + if k not in scapyinfo: + keyfail = True + missing_keys.append(k) + if keyfail: + print('{}: Scapy block present in the test, but is missing info:' + .format(self.sub_class)) + print('{}'.format(missing_keys)) + + pkt = eval(scapyinfo['packet']) + if '$' in scapyinfo['iface']: + tpl = Template(scapyinfo['iface']) + scapyinfo['iface'] = tpl.safe_substitute(NAMES) + for count in range(scapyinfo['count']): + sendp(pkt, iface=scapyinfo['iface']) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json index 4202e95e27b9..bd843ab00a58 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json @@ -406,5 +406,50 @@ "teardown": [ "$TC actions flush action ct" ] + }, + { + "id": "3992", + "name": "Add ct action triggering DNAT tuple conflict", + "category": [ + "actions", + "ct", + "scapy" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + [ + "$TC qdisc del dev $DEV1 ingress", + 0, + 1, + 2, + 255 + ], + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 1 flower ct_state -trk action ct commit nat dst addr 20.0.0.1 port 10 pipe action drop", + "scapy": [ + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.10')/TCP(sport=5000,dport=10)" + }, + { + "iface": "$DEV0", + "count": 1, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "expExitCode": "0", + "verifyCmd": "cat /proc/net/nf_conntrack", + "matchPattern": "dst=10.0.0.20", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json index 41d783254b08..2aad4caa8581 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json @@ -446,6 +446,30 @@ "teardown": [] }, { + "id": "ba5b", + "name": "Add vlan modify action for protocol 802.1Q setting priority 0", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 priority 0 index 100", + "expExitCode": "0", + "verifyCmd": "$TC actions get action vlan index 100", + "matchPattern": "action order [0-9]+: vlan.*modify id 100 priority 0 protocol 802.1Q pipe.*index 100 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action vlan" + ] + }, + { "id": "6812", "name": "Add vlan modify action for protocol 802.1Q", "category": [ @@ -463,7 +487,7 @@ "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100", "expExitCode": "0", "verifyCmd": "$TC actions get action vlan index 100", - "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref", + "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q pipe.*index 100 ref", "matchCount": "0", "teardown": [ "$TC actions flush action vlan" @@ -487,7 +511,7 @@ "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12", "expExitCode": "0", "verifyCmd": "$TC actions get action vlan index 12", - "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref", + "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad reclassify.*index 12 ref", "matchCount": "1", "teardown": [ "$TC actions flush action vlan" diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json index 1cda2e11b3ad..773c5027553d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json @@ -9,11 +9,11 @@ "setup": [ "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DUMMY root fq_pie flows 65536", - "expExitCode": "2", + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_pie flows 65536", + "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc", - "matchCount": "0", + "matchPattern": "qdisc fq_pie 1: root refcnt 2 limit 10240p flows 65536", + "matchCount": "1", "teardown": [ "$IP link del dev $DUMMY" ] diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 7ed7cd95e58f..ebc4ee0fe179 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -363,6 +363,7 @@ ip1 -6 rule add table main suppress_prefixlength 0 ip1 -4 route add default dev wg0 table 51820 ip1 -4 rule add not fwmark 51820 table 51820 ip1 -4 rule add table main suppress_prefixlength 0 +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/vethc/rp_filter' # Flood the pings instead of sending just one, to trigger routing table reference counting bugs. n1 ping -W 1 -c 100 -f 192.168.99.7 n1 ping -W 1 -c 100 -f abab::1111 diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 4eecb432a66c..74db83a0aedd 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MARK=y -CONFIG_NF_CONNTRACK_IPV4=y CONFIG_NF_NAT_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 93cbd6f603f9..2acbb7703c6a 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -84,7 +84,7 @@ void vsock_wait_remote_close(int fd) } /* Connect to <cid, port> and return the file descriptor. */ -int vsock_stream_connect(unsigned int cid, unsigned int port) +static int vsock_connect(unsigned int cid, unsigned int port, int type) { union { struct sockaddr sa; @@ -101,7 +101,7 @@ int vsock_stream_connect(unsigned int cid, unsigned int port) control_expectln("LISTENING"); - fd = socket(AF_VSOCK, SOCK_STREAM, 0); + fd = socket(AF_VSOCK, type, 0); timeout_begin(TIMEOUT); do { @@ -120,11 +120,21 @@ int vsock_stream_connect(unsigned int cid, unsigned int port) return fd; } +int vsock_stream_connect(unsigned int cid, unsigned int port) +{ + return vsock_connect(cid, port, SOCK_STREAM); +} + +int vsock_seqpacket_connect(unsigned int cid, unsigned int port) +{ + return vsock_connect(cid, port, SOCK_SEQPACKET); +} + /* Listen on <cid, port> and return the first incoming connection. The remote * address is stored to clientaddrp. clientaddrp may be NULL. */ -int vsock_stream_accept(unsigned int cid, unsigned int port, - struct sockaddr_vm *clientaddrp) +static int vsock_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp, int type) { union { struct sockaddr sa; @@ -145,7 +155,7 @@ int vsock_stream_accept(unsigned int cid, unsigned int port, int client_fd; int old_errno; - fd = socket(AF_VSOCK, SOCK_STREAM, 0); + fd = socket(AF_VSOCK, type, 0); if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { perror("bind"); @@ -189,6 +199,18 @@ int vsock_stream_accept(unsigned int cid, unsigned int port, return client_fd; } +int vsock_stream_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp) +{ + return vsock_accept(cid, port, clientaddrp, SOCK_STREAM); +} + +int vsock_seqpacket_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp) +{ + return vsock_accept(cid, port, clientaddrp, SOCK_SEQPACKET); +} + /* Transmit one byte and check the return value. * * expected_ret: diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index e53dd09d26d9..a3375ad2fb7f 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -36,8 +36,11 @@ struct test_case { void init_signals(void); unsigned int parse_cid(const char *str); int vsock_stream_connect(unsigned int cid, unsigned int port); +int vsock_seqpacket_connect(unsigned int cid, unsigned int port); int vsock_stream_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp); +int vsock_seqpacket_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp); void vsock_wait_remote_close(int fd); void send_byte(int fd, int expected_ret, int flags); void recv_byte(int fd, int expected_ret, int flags); diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 5a4fb80fa832..67766bfe176f 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -14,6 +14,8 @@ #include <errno.h> #include <unistd.h> #include <linux/kernel.h> +#include <sys/types.h> +#include <sys/socket.h> #include "timeout.h" #include "control.h" @@ -279,6 +281,110 @@ static void test_stream_msg_peek_server(const struct test_opts *opts) close(fd); } +#define MESSAGES_CNT 7 +static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) +{ + int fd; + + fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + /* Send several messages, one with MSG_EOR flag */ + for (int i = 0; i < MESSAGES_CNT; i++) + send_byte(fd, 1, 0); + + control_writeln("SENDDONE"); + close(fd); +} + +static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) +{ + int fd; + char buf[16]; + struct msghdr msg = {0}; + struct iovec iov = {0}; + + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("SENDDONE"); + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + for (int i = 0; i < MESSAGES_CNT; i++) { + if (recvmsg(fd, &msg, 0) != 1) { + perror("message bound violated"); + exit(EXIT_FAILURE); + } + } + + close(fd); +} + +#define MESSAGE_TRUNC_SZ 32 +static void test_seqpacket_msg_trunc_client(const struct test_opts *opts) +{ + int fd; + char buf[MESSAGE_TRUNC_SZ]; + + fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (send(fd, buf, sizeof(buf), 0) != sizeof(buf)) { + perror("send failed"); + exit(EXIT_FAILURE); + } + + control_writeln("SENDDONE"); + close(fd); +} + +static void test_seqpacket_msg_trunc_server(const struct test_opts *opts) +{ + int fd; + char buf[MESSAGE_TRUNC_SZ / 2]; + struct msghdr msg = {0}; + struct iovec iov = {0}; + + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("SENDDONE"); + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + ssize_t ret = recvmsg(fd, &msg, MSG_TRUNC); + + if (ret != MESSAGE_TRUNC_SZ) { + printf("%zi\n", ret); + perror("MSG_TRUNC doesn't work"); + exit(EXIT_FAILURE); + } + + if (!(msg.msg_flags & MSG_TRUNC)) { + fprintf(stderr, "MSG_TRUNC expected\n"); + exit(EXIT_FAILURE); + } + + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -309,6 +415,16 @@ static struct test_case test_cases[] = { .run_client = test_stream_msg_peek_client, .run_server = test_stream_msg_peek_server, }, + { + .name = "SOCK_SEQPACKET msg bounds", + .run_client = test_seqpacket_msg_bounds_client, + .run_server = test_seqpacket_msg_bounds_server, + }, + { + .name = "SOCK_SEQPACKET MSG_TRUNC flag", + .run_client = test_seqpacket_msg_trunc_client, + .run_server = test_seqpacket_msg_trunc_server, + }, {}, }; |
